2008-11-20 11:36:56

by Tetsuo Handa

[permalink] [raw]
Subject: [TOMOYO #13 (mmotm 2008-11-19-02-19) 01/11] Introduce security_path_clear() hook.

To perform DAC performed in vfs_foo() before MAC, we let security_path_foo()
save a result into our own hash table and return 0, and let security_inode_foo()
return the saved result. Since security_inode_foo() is not always called after
security_path_foo(), we need security_path_clear() to clear the hash table.

Signed-off-by: Kentaro Takeda <[email protected]>
Signed-off-by: Tetsuo Handa <[email protected]>
Signed-off-by: Toshiharu Harada <[email protected]>
Cc: Al Viro <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Crispin Cowan <[email protected]>
Cc: Stephen Smalley <[email protected]>
Cc: Casey Schaufler <[email protected]>
Cc: James Morris <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---

fs/namei.c | 9 +++++++++
fs/open.c | 2 ++
include/linux/security.h | 12 ++++++++++++
net/unix/af_unix.c | 1 +
security/capability.c | 5 +++++
security/security.c | 6 ++++++
6 files changed, 35 insertions(+)

--- linux-2.6.28-rc5-mm1.orig/fs/namei.c
+++ linux-2.6.28-rc5-mm1/fs/namei.c
@@ -1566,6 +1566,7 @@ int may_open(struct nameidata *nd, int a
error = do_truncate(dentry, 0,
ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
NULL);
+ security_path_clear();
}
put_write_access(inode);
if (error)
@@ -1594,6 +1595,7 @@ static int __open_namei_create(struct na
if (error)
goto out_unlock;
error = vfs_create(dir->d_inode, path->dentry, mode, nd);
+ security_path_clear();
out_unlock:
mutex_unlock(&dir->d_inode->i_mutex);
dput(nd->path.dentry);
@@ -2022,6 +2024,7 @@ asmlinkage long sys_mknodat(int dfd, con
error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0);
break;
}
+ security_path_clear();
out_drop_write:
mnt_drop_write(nd.path.mnt);
out_dput:
@@ -2086,6 +2089,7 @@ asmlinkage long sys_mkdirat(int dfd, con
if (error)
goto out_drop_write;
error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
+ security_path_clear();
out_drop_write:
mnt_drop_write(nd.path.mnt);
out_dput:
@@ -2200,6 +2204,7 @@ static long do_rmdir(int dfd, const char
if (error)
goto exit4;
error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
+ security_path_clear();
exit4:
mnt_drop_write(nd.path.mnt);
exit3:
@@ -2289,6 +2294,7 @@ static long do_unlinkat(int dfd, const c
if (error)
goto exit3;
error = vfs_unlink(nd.path.dentry->d_inode, dentry);
+ security_path_clear();
exit3:
mnt_drop_write(nd.path.mnt);
exit2:
@@ -2374,6 +2380,7 @@ asmlinkage long sys_symlinkat(const char
if (error)
goto out_drop_write;
error = vfs_symlink(nd.path.dentry->d_inode, dentry, from);
+ security_path_clear();
out_drop_write:
mnt_drop_write(nd.path.mnt);
out_dput:
@@ -2475,6 +2482,7 @@ asmlinkage long sys_linkat(int olddfd, c
if (error)
goto out_drop_write;
error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry);
+ security_path_clear();
out_drop_write:
mnt_drop_write(nd.path.mnt);
out_dput:
@@ -2715,6 +2723,7 @@ asmlinkage long sys_renameat(int olddfd,
goto exit6;
error = vfs_rename(old_dir->d_inode, old_dentry,
new_dir->d_inode, new_dentry);
+ security_path_clear();
exit6:
mnt_drop_write(oldnd.path.mnt);
exit5:
--- linux-2.6.28-rc5-mm1.orig/fs/open.c
+++ linux-2.6.28-rc5-mm1/fs/open.c
@@ -277,6 +277,7 @@ static long do_sys_truncate(const char _
if (!error) {
DQUOT_INIT(inode);
error = do_truncate(path.dentry, length, 0, NULL);
+ security_path_clear();
}

put_write_and_out:
@@ -335,6 +336,7 @@ static long do_sys_ftruncate(unsigned in
ATTR_MTIME|ATTR_CTIME, file);
if (!error)
error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
+ security_path_clear();
out_putf:
fput(file);
out:
--- linux-2.6.28-rc5-mm1.orig/include/linux/security.h
+++ linux-2.6.28-rc5-mm1/include/linux/security.h
@@ -527,6 +527,12 @@ static inline void security_free_mnt_opt
* @inode contains a pointer to the inode.
* @secid contains a pointer to the location where result will be saved.
* In case of failure, @secid will be set to zero.
+ * @path_clear:
+ * Clear error code stored by security_path_*() in case
+ * security_inode_*() was not called when DAC returned an error.
+ * This hook allows LSM modules which use security_path_*() defer
+ * returning LSM's error code till security_inode_*() is called so that
+ * DAC's error (if any) is returned to the caller instead of LSM's error.
*
* Security hooks for file operations
*
@@ -1402,6 +1408,7 @@ struct security_operations {
struct dentry *new_dentry);
int (*path_rename) (struct path *old_dir, struct dentry *old_dentry,
struct path *new_dir, struct dentry *new_dentry);
+ void (*path_clear) (void);
#endif

int (*inode_alloc_security) (struct inode *inode);
@@ -2792,6 +2799,7 @@ int security_path_link(struct dentry *ol
struct dentry *new_dentry);
int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
struct path *new_dir, struct dentry *new_dentry);
+void security_path_clear(void);
#else /* CONFIG_SECURITY_PATH */
static inline int security_path_unlink(struct path *dir, struct dentry *dentry)
{
@@ -2842,6 +2850,10 @@ static inline int security_path_rename(s
{
return 0;
}
+
+static inline void security_path_clear(void)
+{
+}
#endif /* CONFIG_SECURITY_PATH */

#ifdef CONFIG_KEYS
--- linux-2.6.28-rc5-mm1.orig/net/unix/af_unix.c
+++ linux-2.6.28-rc5-mm1/net/unix/af_unix.c
@@ -832,6 +832,7 @@ static int unix_bind(struct socket *sock
if (err)
goto out_mknod_drop_write;
err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
+ security_path_clear();
out_mknod_drop_write:
mnt_drop_write(nd.path.mnt);
if (err)
--- linux-2.6.28-rc5-mm1.orig/security/capability.c
+++ linux-2.6.28-rc5-mm1/security/capability.c
@@ -308,6 +308,10 @@ static int cap_path_truncate(struct path
{
return 0;
}
+
+static void cap_path_clear(void)
+{
+}
#endif

static int cap_file_permission(struct file *file, int mask)
@@ -939,6 +943,7 @@ void security_fixup_ops(struct security_
set_to_cap_if_null(ops, path_link);
set_to_cap_if_null(ops, path_rename);
set_to_cap_if_null(ops, path_truncate);
+ set_to_cap_if_null(ops, path_clear);
#endif
set_to_cap_if_null(ops, file_permission);
set_to_cap_if_null(ops, file_alloc_security);
--- linux-2.6.28-rc5-mm1.orig/security/security.c
+++ linux-2.6.28-rc5-mm1/security/security.c
@@ -419,6 +419,12 @@ int security_path_truncate(struct path *
return 0;
return security_ops->path_truncate(path, length, time_attrs, filp);
}
+
+void security_path_clear(void)
+{
+ return security_ops->path_clear();
+}
+EXPORT_SYMBOL(security_path_clear);
#endif

int security_inode_create(struct inode *dir, struct dentry *dentry, int mode)

--


2008-12-01 20:03:45

by Stephen Smalley

[permalink] [raw]
Subject: Re: [TOMOYO #13 (mmotm 2008-11-19-02-19) 01/11] Introduce security_path_clear() hook.

On Thu, 2008-11-20 at 20:25 +0900, Tetsuo Handa wrote:
> plain text document attachment (introduce-security_path_clear.patch)
> To perform DAC performed in vfs_foo() before MAC, we let security_path_foo()
> save a result into our own hash table and return 0, and let security_inode_foo()
> return the saved result. Since security_inode_foo() is not always called after
> security_path_foo(), we need security_path_clear() to clear the hash table.

This seems very fragile and unmaintainable to me. The fact that you
even need a security_path_clear() hook suggests that something is wrong
with the other security_path* hooks. I'd suggest that you explicitly
pass the result of the security_path* hooks down to the security_inode*
hooks instead. What do others think?

--
Stephen Smalley
National Security Agency

2008-12-02 10:40:19

by Tetsuo Handa

[permalink] [raw]
Subject: Re: [TOMOYO #13 (mmotm 2008-11-19-02-19) 01/11] Introduce security_path_clear() hook.

Hello.

Stephen Smalley wrote:
> On Thu, 2008-11-20 at 20:25 +0900, Tetsuo Handa wrote:
> > plain text document attachment (introduce-security_path_clear.patch)
> > To perform DAC performed in vfs_foo() before MAC, we let security_path_foo()
> > save a result into our own hash table and return 0, and let security_inode_foo()
> > return the saved result. Since security_inode_foo() is not always called after
> > security_path_foo(), we need security_path_clear() to clear the hash table.
>
> This seems very fragile and unmaintainable to me. The fact that you
> even need a security_path_clear() hook suggests that something is wrong
> with the other security_path* hooks. I'd suggest that you explicitly
> pass the result of the security_path* hooks down to the security_inode*
> hooks instead. What do others think?

You are recommending us to pass variables required for security_inode_*() via
stack memory rather than private hash, aren't you?
I think there are two problems.

One is that the variable passed via stack memory won't be used by SELinux and
SMACK and "CONFIG_SECURITY=n kernels", which will be a waste of stack memory.

The other one is that TOMOYO will need another variable for telling how the
security_inode_*() are called. Passing the variable via stack memory requires
modification of all vfs_*() calls, but TOMOYO doesn't check requests issued
by (e.g.) stackable filesystems.

By the way, this security_path_clear() is intended to be able to return DAC's
error code in priority to MAC's error code, but there are two problems for
TOMOYO.
One is that pathnames which will be later denied by DAC are appended by
TOMOYO's learning mode (i.e. garbage entries appears in the learned policy).
The other is that warning messages on pathnames which will be later denied by
DAC are generated by TOMOYO's enforcing mode.

Thus, it will be preferable for TOMOYO to "do MAC checks after DAC checks"
rather than to "return DAC's error in priority to MAC's error while doing MAC
checks before DAC checks".

To do so, "security_path_*() should be replaced by security_path_set(vfsmount)"
and "let security_inode_*() do MAC checks using the result of
security_path_set()" and "let security_path_clear() clear the result of
security_path_set() in case security_inode_*() was not called".

So, I think storing the pathname of "struct vfsmount" in the form of "char *"
into private hash at security_path_set() and clearing the private hash at
security_path_clear() should be most preferable.

Regards.

2008-12-02 13:52:35

by Stephen Smalley

[permalink] [raw]
Subject: Re: [TOMOYO #13 (mmotm 2008-11-19-02-19) 01/11] Introduce security_path_clear() hook.

On Tue, 2008-12-02 at 19:39 +0900, Tetsuo Handa wrote:
> Hello.
>
> Stephen Smalley wrote:
> > On Thu, 2008-11-20 at 20:25 +0900, Tetsuo Handa wrote:
> > > plain text document attachment (introduce-security_path_clear.patch)
> > > To perform DAC performed in vfs_foo() before MAC, we let security_path_foo()
> > > save a result into our own hash table and return 0, and let security_inode_foo()
> > > return the saved result. Since security_inode_foo() is not always called after
> > > security_path_foo(), we need security_path_clear() to clear the hash table.
> >
> > This seems very fragile and unmaintainable to me. The fact that you
> > even need a security_path_clear() hook suggests that something is wrong
> > with the other security_path* hooks. I'd suggest that you explicitly
> > pass the result of the security_path* hooks down to the security_inode*
> > hooks instead. What do others think?
>
> You are recommending us to pass variables required for security_inode_*() via
> stack memory rather than private hash, aren't you?

To be precise, I was recommending passing the return value of
security_path* down to security_inode* explicitly rather than doing it
implicitly as you presently do. Thereby making the actual control flow
and relationship between the security_path* and security_inode* hooks
evident. However, I guess that is moot given your statements below.

> I think there are two problems.
>
> One is that the variable passed via stack memory won't be used by SELinux and
> SMACK and "CONFIG_SECURITY=n kernels", which will be a waste of stack memory.

I'm more concerned with the hook interface being understandable and
maintainable.

> The other one is that TOMOYO will need another variable for telling how the
> security_inode_*() are called. Passing the variable via stack memory requires
> modification of all vfs_*() calls, but TOMOYO doesn't check requests issued
> by (e.g.) stackable filesystems.

I'm not clear on why that requires a separate argument; if the caller is
passing in the access decision result as an input, then certain callers
(e.g. stackable filesystems) can always pass 0 (success).

> By the way, this security_path_clear() is intended to be able to return DAC's
> error code in priority to MAC's error code, but there are two problems for
> TOMOYO.
> One is that pathnames which will be later denied by DAC are appended by
> TOMOYO's learning mode (i.e. garbage entries appears in the learned policy).
> The other is that warning messages on pathnames which will be later denied by
> DAC are generated by TOMOYO's enforcing mode.
>
> Thus, it will be preferable for TOMOYO to "do MAC checks after DAC checks"
> rather than to "return DAC's error in priority to MAC's error while doing MAC
> checks before DAC checks".

It sounds like the existing security_path* hooks are not adequate for
your needs then, and that patch should not in fact be merged. Yes?

> To do so, "security_path_*() should be replaced by security_path_set(vfsmount)"
> and "let security_inode_*() do MAC checks using the result of
> security_path_set()" and "let security_path_clear() clear the result of
> security_path_set() in case security_inode_*() was not called".
>
> So, I think storing the pathname of "struct vfsmount" in the form of "char *"
> into private hash at security_path_set() and clearing the private hash at
> security_path_clear() should be most preferable.

Then I guess you need to redo your patches along those lines and
re-submit them. Likely starting with just a patch adding the
security_path_set/clear hooks, posted to lsm and fsdevel.

--
Stephen Smalley
National Security Agency

2008-12-03 08:49:34

by Kentaro Takeda

[permalink] [raw]
Subject: Re: [TOMOYO #13 (mmotm 2008-11-19-02-19) 01/11] Introduce security_path_clear() hook.

Stephen Smalley wrote:
> To be precise, I was recommending passing the return value of
> security_path* down to security_inode* explicitly rather than doing it
> implicitly as you presently do. Thereby making the actual control flow
> and relationship between the security_path* and security_inode* hooks
> evident. However, I guess that is moot given your statements below.
I think so too.

>> I think there are two problems.
>>
>> One is that the variable passed via stack memory won't be used by SELinux and
>> SMACK and "CONFIG_SECURITY=n kernels", which will be a waste of stack memory.
>
> I'm more concerned with the hook interface being understandable and
> maintainable.
I see.

>> The other one is that TOMOYO will need another variable for telling how the
>> security_inode_*() are called. Passing the variable via stack memory requires
>> modification of all vfs_*() calls, but TOMOYO doesn't check requests issued
>> by (e.g.) stackable filesystems.
>
> I'm not clear on why that requires a separate argument; if the caller is
> passing in the access decision result as an input, then certain callers
> (e.g. stackable filesystems) can always pass 0 (success).
If we use stack memory to pass the access decision result from security_path_*()
to security_inode_*(), this method seems possible.

>> By the way, this security_path_clear() is intended to be able to return DAC's
>> error code in priority to MAC's error code, but there are two problems for
>> TOMOYO.
>> One is that pathnames which will be later denied by DAC are appended by
>> TOMOYO's learning mode (i.e. garbage entries appears in the learned policy).
>> The other is that warning messages on pathnames which will be later denied by
>> DAC are generated by TOMOYO's enforcing mode.
>>
>> Thus, it will be preferable for TOMOYO to "do MAC checks after DAC checks"
>> rather than to "return DAC's error in priority to MAC's error while doing MAC
>> checks before DAC checks".
>
> It sounds like the existing security_path* hooks are not adequate for
> your needs then, and that patch should not in fact be merged. Yes?
Sorry for confusing you. security_path_*() hooks are adequate for TOMOYO
functionality itself. But they are inadequate for performing DAC before MAC,
which we eventually want to do. We've reached this "passing vfsmount's pathname"
approach after proposing the previous patch. The new approach is a little
divergence of the existing approach, which Serge has patiently advised us.
It should be more suitable for DAC-before-MAC than the previous patch, we think.

>> To do so, "security_path_*() should be replaced by security_path_set(vfsmount)"
>> and "let security_inode_*() do MAC checks using the result of
>> security_path_set()" and "let security_path_clear() clear the result of
>> security_path_set() in case security_inode_*() was not called".
>>
>> So, I think storing the pathname of "struct vfsmount" in the form of "char *"
>> into private hash at security_path_set() and clearing the private hash at
>> security_path_clear() should be most preferable.
>
> Then I guess you need to redo your patches along those lines and
> re-submit them. Likely starting with just a patch adding the
> security_path_set/clear hooks, posted to lsm and fsdevel.
I'll post it soon.

If we pass the access decision result through stack memory, we don't need
security_path_clear() as you mentioned. However, if we pass the vfsmount's
pathname, security_path_clear() is still needed in order to free the pathname.

Regards,

2008-12-03 08:56:53

by Kentaro Takeda

[permalink] [raw]
Subject: [PATCH (mmotm-2008-12-02-17-08)] Introduce security_path_set/clear() hooks.

Stephen, Serge,
Here is the patch for introducing new security_path_set()/clear() hooks.

This patch enables LSM module to remember vfsmount's pathname so that it can
calculate absolute pathname in security_inode_*(). Since actual MAC can be
performed after DAC, there will not be any noise in auditing and learning
features. This patch currently assumes that the vfsmount's pathname is stored in
hash table in LSM module. (Should I use stack memory?)

Since security_inode_*() are not always called after security_path_set(),
security_path_clear() hook is needed to free the remembered pathname.

Andrew,
If lsm and fs guys accept this patch, please replace
introduce-new-lsm-hooks-where-vfsmount-is-available.patch with this patch.


Otherwise, if we could call DAC functions, such as may_create(), in LSM module,
security_path_clear() hook would not be needed. This approach is simple, but it
might be objected because of layering. ;-)

Regards,

----- What is this patch for? -----

There are security_inode_*() LSM hooks for attribute-based MAC, but they are not
suitable for pathname-based MAC because they don't receive "struct vfsmount"
information.

----- How this patch was developed? -----

Two pathname-based MACs, AppArmor and TOMOYO Linux, are trying to merge
upstream. But because of "struct vfsmount" problem, they have been unable to
merge upstream.

Here are the list of approaches and the reasons of denial.

(1) Not using LSM
http://lwn.net/Articles/277833/

This approach was rejected because security modules should use LSM because the
whole idea behind LSM was to have a single set of hooks for all security
modules; if every module now adds its own set of hooks, that purpose will have
been defeated and the kernel will turn into a big mess of security hooks.

(2) Retrieving "struct vfsmount" from "struct task_struct".
http://lkml.org/lkml/2007/11/5/388

Since "struct task_struct" contains list of "struct vfsmount",
"struct vfsmount" which corresponds to "struct dentry" can be retrieved from
the list unless "mount --bind" is used.

This approach turned out to cause a critical problem that getting namespace_sem
lock from security_inode_*() triggers AB-BA deadlock.

(3) Adding "struct vfsmount" parameter to VFS helper functions.
http://lkml.org/lkml/2008/5/29/207

This approach adds "struct vfsmount" to VFS helper functions (e.g. vfs_mkdir()
and vfs_symlink()) and LSM hooks inside VFS helper functions. This approach is
helpful for not only AppArmor and TOMOYO Linux 2.x but also SELinux and
auditing purpose, for this approach allows existent LSM users to use pathnames
in their access control and audit logs.

This approach was rejected by Al Viro, the VFS maintainer, because he thinks
individual filesystem should remain "struct vfsmount"-unaware and VFS helper
functions should not receive "struct vfsmount".

Al Viro also suggested to move existing security_inode_*() to out of VFS
helper functions so that security_inode_*() can receive "struct vfsmount"
without modifying VFS helper functions, but this suggestion was opposed by
Stephen Smalley because changing the order of permission checks (i.e.
MAC checks before DAC checks) is not acceptable.

(4) Passing "struct vfsmount" via "struct task_struct".
http://lkml.org/lkml/2007/11/16/157

Since we didn't understand the reason why accessing "struct vfsmount" from
LSM hooks inside VFS helper functions is not acceptable, we thought the reason
why VFS helper functions don't receive "struct vfsmount" is the amount of
modifications needed to do so. Thus, we proposed to pass "struct vfsmount" via
"struct task_struct" so that modifications remain minimal.

This approach was rejected because this is an abuse of "struct task_struct".

(5) Remembering pathname of "struct vfsmount" via "struct task_struct".
http://lkml.org/lkml/2008/8/19/16

Since pathname of a "struct dentry" up to the mount point can be calculated
without "struct vfsmount", absolute pathname of a "struct dentry" can be
calculated if "struct task_struct" can remember absolute pathname of a
"struct vfsmount" which corresponds to "struct dentry".
As we now understand that Al Viro is opposing to access "struct vfsmount" from
LSM hooks inside VFS helper functions, we gave up delivering "struct vfsmount"
to LSM hooks inside VFS helper functions.
Kernel 2.6.26 introduced read-only bind mount feature, and hooks for that
feature (i.e. mnt_want_write() and mnt_drop_write()) were inserted around
VFS helper functions call. Since mnt_want_write() receives "struct vfsmount"
which corresponds to "struct dentry" that will be passed to subsequent VFS
helper functions call, we associated pathname of "struct vfsmount" with
"struct task_struct" instead of associating "struct vfsmount" itself.

This approach was not explicitly rejected, but there seems to be performance
problem.

(6) Introducing new LSM hooks.
http://lkml.org/lkml/2008/9/24/48

We understand that adding new LSM hooks which receive "struct vfsmount" outside
VFS helper functions is the most straightforward approach. This approach has
less impact to existing LSM module and no impact to VFS helper functions.

(7) Remembering pathname of "struct vfsmount" via new LSM hooks.
(this patch)

We proposed (6) so that we can implement MAC which can take an absolute
pathname of a requested file into account. We embedded DAC's code copied from
VFS helper functions into (6). But we received a comment that copying DAC's
code is not a good thing. Thus, we once gave up doing DAC checks before MAC
checks.

But, we do want to do DAC checks before MAC checks so that we can avoid MAC's
noisy error messages generated by access requests which will be rejected by
DAC.

There are two restrictions for now.

One is that we should avoid accessing "struct vfsmount" inside VFS helper
functions and security_inode_*() so that we can keep clear separation between
vfsmount-aware layer and vfsmount-unaware layer. Thus, there is no chance to
pass "struct vfsmount" parameter to VFS helper functions and
security_inode_*().

The other is that we should avoid copying DAC's code into security_path_*().
The security_path_*() which was proposed in (6) allows us to implement MAC
which can take an absolute pathname of a requested file into account, but
keeps us away from doing DAC checks before MAC checks.

We were using security_path_*(), but we still want to do DAC checks before
MAC checks. Thus, we propose this patch which is a variant of (5) (which was
not explicitly rejected). This patch introduces
security_path_set(struct vfsmount *) and security_path_clear().
We want to use these hooks as follows.

(a) Let security_path_set() which are inserted before calling VFS helper
functions remember the absolute pathname of "struct vfsmount" and
store the result which are in the form of "char *" into private hash.

(b) Let security_inode_*() do MAC using the pathname stored by
security_path_set().

(c) Let security_path_clear() which are inserted after calling VFS helper
functions clear the pathname from private hash.

This approach is similar to (5), but to avoid performance problem,
this patch inserts into minimal locations that TOMOYO Linux needs.

Signed-off-by: Kentaro Takeda <[email protected]>
Signed-off-by: Tetsuo Handa <[email protected]>
Signed-off-by: Toshiharu Harada <[email protected]>
Cc: Al Viro <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Crispin Cowan <[email protected]>
Cc: Stephen Smalley <[email protected]>
Cc: Casey Schaufler <[email protected]>
Cc: James Morris <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---

fs/namei.c | 43 +++++++++++++++++++++++++++++++++++++++++++
fs/open.c | 6 ++++++
include/linux/security.h | 24 ++++++++++++++++++++++++
net/unix/af_unix.c | 5 +++++
security/Kconfig | 10 ++++++++++
security/capability.c | 17 +++++++++++++++++
security/security.c | 16 ++++++++++++++++
7 files changed, 121 insertions(+)

--- linux-2.6.28-rc7-mm1.orig/fs/namei.c
+++ linux-2.6.28-rc7-mm1/fs/namei.c
@@ -1556,12 +1556,15 @@ int may_open(struct nameidata *nd, int a
* Refuse to truncate files with mandatory locks held on them.
*/
error = locks_verify_locked(inode);
+ if (!error)
+ error = security_path_set(nd->path.mnt);
if (!error) {
DQUOT_INIT(inode);

error = do_truncate(dentry, 0,
ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
NULL);
+ security_path_clear();
}
put_write_access(inode);
if (error)
@@ -1586,7 +1589,12 @@ static int __open_namei_create(struct na

if (!IS_POSIXACL(dir->d_inode))
mode &= ~current->fs->umask;
+ error = security_path_set(nd->path.mnt);
+ if (error)
+ goto out_unlock;
error = vfs_create(dir->d_inode, path->dentry, mode, nd);
+ security_path_clear();
+out_unlock:
mutex_unlock(&dir->d_inode->i_mutex);
dput(nd->path.dentry);
nd->path.dentry = path->dentry;
@@ -1999,6 +2007,9 @@ asmlinkage long sys_mknodat(int dfd, con
error = mnt_want_write(nd.path.mnt);
if (error)
goto out_dput;
+ error = security_path_set(nd.path.mnt);
+ if (error)
+ goto out_drop_write;
switch (mode & S_IFMT) {
case 0: case S_IFREG:
error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd);
@@ -2011,6 +2022,8 @@ asmlinkage long sys_mknodat(int dfd, con
error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0);
break;
}
+ security_path_clear();
+out_drop_write:
mnt_drop_write(nd.path.mnt);
out_dput:
dput(dentry);
@@ -2070,7 +2083,12 @@ asmlinkage long sys_mkdirat(int dfd, con
error = mnt_want_write(nd.path.mnt);
if (error)
goto out_dput;
+ error = security_path_set(nd.path.mnt);
+ if (error)
+ goto out_drop_write;
error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
+ security_path_clear();
+out_drop_write:
mnt_drop_write(nd.path.mnt);
out_dput:
dput(dentry);
@@ -2180,7 +2198,12 @@ static long do_rmdir(int dfd, const char
error = mnt_want_write(nd.path.mnt);
if (error)
goto exit3;
+ error = security_path_set(nd.path.mnt);
+ if (error)
+ goto exit4;
error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
+ security_path_clear();
+exit4:
mnt_drop_write(nd.path.mnt);
exit3:
dput(dentry);
@@ -2265,7 +2288,12 @@ static long do_unlinkat(int dfd, const c
error = mnt_want_write(nd.path.mnt);
if (error)
goto exit2;
+ error = security_path_set(nd.path.mnt);
+ if (error)
+ goto exit3;
error = vfs_unlink(nd.path.dentry->d_inode, dentry);
+ security_path_clear();
+exit3:
mnt_drop_write(nd.path.mnt);
exit2:
dput(dentry);
@@ -2346,7 +2374,12 @@ asmlinkage long sys_symlinkat(const char
error = mnt_want_write(nd.path.mnt);
if (error)
goto out_dput;
+ error = security_path_set(nd.path.mnt);
+ if (error)
+ goto out_drop_write;
error = vfs_symlink(nd.path.dentry->d_inode, dentry, from);
+ security_path_clear();
+out_drop_write:
mnt_drop_write(nd.path.mnt);
out_dput:
dput(dentry);
@@ -2443,7 +2476,12 @@ asmlinkage long sys_linkat(int olddfd, c
error = mnt_want_write(nd.path.mnt);
if (error)
goto out_dput;
+ error = security_path_set(nd.path.mnt);
+ if (error)
+ goto out_drop_write;
error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry);
+ security_path_clear();
+out_drop_write:
mnt_drop_write(nd.path.mnt);
out_dput:
dput(new_dentry);
@@ -2677,8 +2715,13 @@ asmlinkage long sys_renameat(int olddfd,
error = mnt_want_write(oldnd.path.mnt);
if (error)
goto exit5;
+ error = security_path_set(oldnd.path.mnt);
+ if (error)
+ goto exit6;
error = vfs_rename(old_dir->d_inode, old_dentry,
new_dir->d_inode, new_dentry);
+ security_path_clear();
+exit6:
mnt_drop_write(oldnd.path.mnt);
exit5:
dput(new_dentry);
--- linux-2.6.28-rc7-mm1.orig/fs/open.c
+++ linux-2.6.28-rc7-mm1/fs/open.c
@@ -272,9 +272,12 @@ static long do_sys_truncate(const char _
goto put_write_and_out;

error = locks_verify_truncate(inode, NULL, length);
+ if (!error)
+ error = security_path_set(path.mnt);
if (!error) {
DQUOT_INIT(inode);
error = do_truncate(path.dentry, length, 0, NULL);
+ security_path_clear();
}

put_write_and_out:
@@ -329,7 +332,10 @@ static long do_sys_ftruncate(unsigned in

error = locks_verify_truncate(inode, file, length);
if (!error)
+ error = security_path_set(file->f_path.mnt);
+ if (!error)
error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
+ security_path_clear();
out_putf:
fput(file);
out:
--- linux-2.6.28-rc7-mm1.orig/include/linux/security.h
+++ linux-2.6.28-rc7-mm1/include/linux/security.h
@@ -470,6 +470,12 @@ static inline void security_free_mnt_opt
* @inode contains a pointer to the inode.
* @secid contains a pointer to the location where result will be saved.
* In case of failure, @secid will be set to zero.
+ * @path_set:
+ * Calculate pathname of vfsmount for subsequent vfs operation.
+ * @vfsmnt contains the vfsmount structure.
+ * Return 0 on success, negative value otherwise.
+ * @path_clear:
+ * Clear pathname of vfsmount calculated by @path_set.
*
* Security hooks for file operations
*
@@ -1331,6 +1337,11 @@ struct security_operations {
struct super_block *newsb);
int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts);

+#ifdef CONFIG_SECURITY_PATH
+ int (*path_set) (struct vfsmount *vfsmnt);
+ void (*path_clear) (void);
+#endif
+
int (*inode_alloc_security) (struct inode *inode);
void (*inode_free_security) (struct inode *inode);
int (*inode_init_security) (struct inode *inode, struct inode *dir,
@@ -2705,6 +2716,19 @@ static inline void security_skb_classify

#endif /* CONFIG_SECURITY_NETWORK_XFRM */

+#ifdef CONFIG_SECURITY_PATH
+int security_path_set(struct vfsmount *vfsmnt);
+void security_path_clear(void);
+#else /* CONFIG_SECURITY_PATH */
+static inline int security_path_set(struct vfsmount *vfsmnt)
+{
+ return 0;
+}
+static inline void security_path_clear(void)
+{
+}
+#endif /* CONFIG_SECURITY_PATH */
+
#ifdef CONFIG_KEYS
#ifdef CONFIG_SECURITY

--- linux-2.6.28-rc7-mm1.orig/net/unix/af_unix.c
+++ linux-2.6.28-rc7-mm1/net/unix/af_unix.c
@@ -836,7 +836,12 @@ static int unix_bind(struct socket *sock
err = mnt_want_write(nd.path.mnt);
if (err)
goto out_mknod_dput;
+ err = security_path_set(nd.path.mnt);
+ if (err)
+ goto out_mknod_drop_write;
err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
+ security_path_clear();
+out_mknod_drop_write:
mnt_drop_write(nd.path.mnt);
if (err)
goto out_mknod_dput;
--- linux-2.6.28-rc7-mm1.orig/security/Kconfig
+++ linux-2.6.28-rc7-mm1/security/Kconfig
@@ -81,6 +81,16 @@ config SECURITY_NETWORK_XFRM
IPSec.
If you are unsure how to answer this question, answer N.

+config SECURITY_PATH
+ bool "Security hooks for pathname based access control"
+ depends on SECURITY
+ help
+ This adds security_path_set() and security_path_clear()
+ hooks for pathname based access control.
+ If enabled, a security module can use these hooks to
+ implement pathname based access controls.
+ If you are unsure how to answer this question, answer N.
+
config SECURITY_FILE_CAPABILITIES
bool "File POSIX Capabilities"
default n
--- linux-2.6.28-rc7-mm1.orig/security/capability.c
+++ linux-2.6.28-rc7-mm1/security/capability.c
@@ -263,6 +263,19 @@ static void cap_inode_getsecid(const str
*secid = 0;
}

+#ifdef CONFIG_SECURITY_PATH
+
+static int cap_path_set(struct vfsmount *vfsmnt)
+{
+ return 0;
+}
+
+static void cap_path_clear(void)
+{
+}
+
+#endif
+
static int cap_file_permission(struct file *file, int mask)
{
return 0;
@@ -883,6 +896,10 @@ void security_fixup_ops(struct security_
set_to_cap_if_null(ops, inode_setsecurity);
set_to_cap_if_null(ops, inode_listsecurity);
set_to_cap_if_null(ops, inode_getsecid);
+#ifdef CONFIG_SECURITY_PATH
+ set_to_cap_if_null(ops, path_set);
+ set_to_cap_if_null(ops, path_clear);
+#endif
set_to_cap_if_null(ops, file_permission);
set_to_cap_if_null(ops, file_alloc_security);
set_to_cap_if_null(ops, file_free_security);
--- linux-2.6.28-rc7-mm1.orig/security/security.c
+++ linux-2.6.28-rc7-mm1/security/security.c
@@ -355,6 +355,22 @@ int security_inode_init_security(struct
}
EXPORT_SYMBOL(security_inode_init_security);

+#ifdef CONFIG_SECURITY_PATH
+
+int security_path_set(struct vfsmount *vfsmnt)
+{
+ return security_ops->path_set(vfsmnt);
+}
+EXPORT_SYMBOL(security_path_set);
+
+void security_path_clear(void)
+{
+ return security_ops->path_clear();
+}
+EXPORT_SYMBOL(security_path_clear);
+
+#endif
+
int security_inode_create(struct inode *dir, struct dentry *dentry, int mode)
{
if (unlikely(IS_PRIVATE(dir)))

2008-12-03 14:16:22

by Stephen Smalley

[permalink] [raw]
Subject: Re: [PATCH (mmotm-2008-12-02-17-08)] Introduce security_path_set/clear() hooks.

On Wed, 2008-12-03 at 17:56 +0900, Kentaro Takeda wrote:
> Stephen, Serge,
> Here is the patch for introducing new security_path_set()/clear() hooks.
>
> This patch enables LSM module to remember vfsmount's pathname so that it can
> calculate absolute pathname in security_inode_*(). Since actual MAC can be
> performed after DAC, there will not be any noise in auditing and learning
> features. This patch currently assumes that the vfsmount's pathname is stored in
> hash table in LSM module. (Should I use stack memory?)
>
> Since security_inode_*() are not always called after security_path_set(),
> security_path_clear() hook is needed to free the remembered pathname.

Your security_path_set()/security_path_clear() pairs look rather similar
to mnt_want_write()/mnt_drop_write() pairs. What if you were to call
your hooks from those functions, and then you would only need to add
further hook calls in the case of read-only and execute/search checks?

--
Stephen Smalley
National Security Agency

2008-12-04 12:00:37

by Tetsuo Handa

[permalink] [raw]
Subject: Re: [PATCH (mmotm-2008-12-02-17-08)] Introduce security_path_set/clear() hooks.

Hello.

Stephen Smalley wrote:
> On Wed, 2008-12-03 at 17:56 +0900, Kentaro Takeda wrote:
> > Stephen, Serge,
> > Here is the patch for introducing new security_path_set()/clear() hooks.
> >
> > This patch enables LSM module to remember vfsmount's pathname so that it can
> > calculate absolute pathname in security_inode_*(). Since actual MAC can be
> > performed after DAC, there will not be any noise in auditing and learning
> > features. This patch currently assumes that the vfsmount's pathname is stored in
> > hash table in LSM module. (Should I use stack memory?)
> >
> > Since security_inode_*() are not always called after security_path_set(),
> > security_path_clear() hook is needed to free the remembered pathname.
>
> Your security_path_set()/security_path_clear() pairs look rather similar
> to mnt_want_write()/mnt_drop_write() pairs. What if you were to call
> your hooks from those functions, and then you would only need to add
> further hook calls in the case of read-only and execute/search checks?

Right. Locations of inserting security_path_set()/security_path_clear() pairs
are subset of mnt_want_write()/mnt_drop_write() pairs. Thus, we can insert
security_path_set()/security_path_clear() pairs into
mnt_want_write()/mnt_drop_write() pairs, if we can tolerate performance
regression. According to our rough measurement, there is about 8 - 22% of
performance regression. But this approach needs minimum modification to the
existing kernel (only two hooks to be inserted).

The attached patch embeds security_path_set()/security_path_clear() into
mnt_want_write()/mnt_drop_write() and adds an example LSM module which
calculates vfsmount's pathname.
If LSM and FS people can accept this approach, we want to use it.

(----- When below patch is enabled -----)
# time dd status=noxfer if=/dev/zero of=/tmp/file bs=1 count=10485760
10485760+0 records in
10485760+0 records out

real 0m32.139s
user 0m2.303s
sys 0m29.756s
# time dd status=noxfer if=/dev/zero of=/tmp/file bs=512 count=20480
20480+0 records in
20480+0 records out

real 0m0.087s
user 0m0.002s
sys 0m0.085s
# time dd status=noxfer if=/dev/zero of=/tmp/file bs=4096 count=2560
2560+0 records in
2560+0 records out

real 0m0.028s
user 0m0.001s
sys 0m0.027s

(----- When below patch is disbled -----)
# time dd status=noxfer if=/dev/zero of=/tmp/file bs=1 count=10485760
10485760+0 records in
10485760+0 records out

real 0m26.776s
user 0m2.281s
sys 0m24.373s
# time dd status=noxfer if=/dev/zero of=/tmp/file bs=512 count=20480
20480+0 records in
20480+0 records out

real 0m0.077s
user 0m0.002s
sys 0m0.073s
# time dd status=noxfer if=/dev/zero of=/tmp/file bs=4096 count=2560
2560+0 records in
2560+0 records out

real 0m0.025s
user 0m0.001s
sys 0m0.024s


Regards.

--------------------
Subject: Embed security_path_set()/security_path_clear() into mnt_want_write()/mnt_drop_write().

This is a LSM version of http://lkml.org/lkml/2008/8/19/16 .

Signed-off-by: Kentaro Takeda <[email protected]>
Signed-off-by: Tetsuo Handa <[email protected]>
Signed-off-by: Toshiharu Harada <[email protected]>
---

fs/namespace.c | 11 +++++
include/linux/security.h | 24 +++++++++++
security/Kconfig | 10 ++++
security/Makefile | 1
security/capability.c | 17 +++++++
security/mnt_path.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++
security/security.c | 14 ++++++
7 files changed, 177 insertions(+)

--- linux-2.6.28-rc7-mm1.orig/fs/namespace.c
+++ linux-2.6.28-rc7-mm1/fs/namespace.c
@@ -254,6 +254,10 @@ int mnt_want_write(struct vfsmount *mnt)
int ret = 0;
struct mnt_writer *cpu_writer;

+#ifdef CONFIG_SECURITY_PATH
+ if (security_path_set(mnt) < 0)
+ return -ENOMEM;
+#endif
cpu_writer = &get_cpu_var(mnt_writers);
spin_lock(&cpu_writer->lock);
if (__mnt_is_readonly(mnt)) {
@@ -265,6 +269,10 @@ int mnt_want_write(struct vfsmount *mnt)
out:
spin_unlock(&cpu_writer->lock);
put_cpu_var(mnt_writers);
+#ifdef CONFIG_SECURITY_PATH
+ if (ret)
+ security_path_clear();
+#endif
return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write);
@@ -362,6 +370,9 @@ void mnt_drop_write(struct vfsmount *mnt
* we could theoretically wrap __mnt_writers.
*/
put_cpu_var(mnt_writers);
+#ifdef CONFIG_SECURITY_PATH
+ security_path_clear();
+#endif
}
EXPORT_SYMBOL_GPL(mnt_drop_write);

--- linux-2.6.28-rc7-mm1.orig/include/linux/security.h
+++ linux-2.6.28-rc7-mm1/include/linux/security.h
@@ -470,6 +470,12 @@ static inline void security_free_mnt_opt
* @inode contains a pointer to the inode.
* @secid contains a pointer to the location where result will be saved.
* In case of failure, @secid will be set to zero.
+ * @path_set:
+ * Calculate pathname of vfsmount for subsequent vfs operation.
+ * @vfsmnt contains the vfsmount structure.
+ * Return 0 on success, negative value otherwise.
+ * @path_clear:
+ * Clear pathname of vfsmount calculated by @path_set.
*
* Security hooks for file operations
*
@@ -1331,6 +1337,11 @@ struct security_operations {
struct super_block *newsb);
int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts);

+#ifdef CONFIG_SECURITY_PATH
+ int (*path_set) (struct vfsmount *vfsmnt);
+ void (*path_clear) (void);
+#endif
+
int (*inode_alloc_security) (struct inode *inode);
void (*inode_free_security) (struct inode *inode);
int (*inode_init_security) (struct inode *inode, struct inode *dir,
@@ -2705,6 +2716,19 @@ static inline void security_skb_classify

#endif /* CONFIG_SECURITY_NETWORK_XFRM */

+#ifdef CONFIG_SECURITY_PATH
+int security_path_set(struct vfsmount *vfsmnt);
+void security_path_clear(void);
+#else /* CONFIG_SECURITY_PATH */
+static inline int security_path_set(struct vfsmount *vfsmnt)
+{
+ return 0;
+}
+static inline void security_path_clear(void)
+{
+}
+#endif /* CONFIG_SECURITY_PATH */
+
#ifdef CONFIG_KEYS
#ifdef CONFIG_SECURITY

--- linux-2.6.28-rc7-mm1.orig/security/Kconfig
+++ linux-2.6.28-rc7-mm1/security/Kconfig
@@ -81,6 +81,16 @@ config SECURITY_NETWORK_XFRM
IPSec.
If you are unsure how to answer this question, answer N.

+config SECURITY_PATH
+ bool "Security hooks for pathname based access control"
+ depends on SECURITY
+ help
+ This adds security_path_set() and security_path_clear()
+ hooks for pathname based access control.
+ If enabled, a security module can use these hooks to
+ implement pathname based access controls.
+ If you are unsure how to answer this question, answer N.
+
config SECURITY_FILE_CAPABILITIES
bool "File POSIX Capabilities"
default n
--- linux-2.6.28-rc7-mm1.orig/security/Makefile
+++ linux-2.6.28-rc7-mm1/security/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_SECURITY_SELINUX) += selin
obj-$(CONFIG_SECURITY_SMACK) += smack/built-in.o
obj-$(CONFIG_SECURITY_ROOTPLUG) += root_plug.o
obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o
+obj-$(CONFIG_SECURITY_PATH) += mnt_path.o
--- linux-2.6.28-rc7-mm1.orig/security/capability.c
+++ linux-2.6.28-rc7-mm1/security/capability.c
@@ -263,6 +263,19 @@ static void cap_inode_getsecid(const str
*secid = 0;
}

+#ifdef CONFIG_SECURITY_PATH
+
+static int cap_path_set(struct vfsmount *vfsmnt)
+{
+ return 0;
+}
+
+static void cap_path_clear(void)
+{
+}
+
+#endif
+
static int cap_file_permission(struct file *file, int mask)
{
return 0;
@@ -883,6 +896,10 @@ void security_fixup_ops(struct security_
set_to_cap_if_null(ops, inode_setsecurity);
set_to_cap_if_null(ops, inode_listsecurity);
set_to_cap_if_null(ops, inode_getsecid);
+#ifdef CONFIG_SECURITY_PATH
+ set_to_cap_if_null(ops, path_set);
+ set_to_cap_if_null(ops, path_clear);
+#endif
set_to_cap_if_null(ops, file_permission);
set_to_cap_if_null(ops, file_alloc_security);
set_to_cap_if_null(ops, file_free_security);
--- /dev/null
+++ linux-2.6.28-rc7-mm1/security/mnt_path.c
@@ -0,0 +1,100 @@
+/* mnt_path tracker */
+#include <linux/security.h>
+#include <linux/mount.h>
+
+/**
+ * mp_update_mnt_path - Update list of pathname of vfsmount.
+ *
+ * @mnt_path: Pointer to "const char *" or NULL.
+ *
+ * Returns @mnt_path on success, NULL otherwise if @mnt_path != NULL.
+ * Returns previously saved "const char *" and clears it if @mnt_path == NULL.
+ */
+static const char *mp_update_mnt_path(const char *mnt_path)
+{
+ struct mnt_path_entry {
+ struct list_head list;
+ struct task_struct *task; /* = current */
+ const char *mnt_path;
+ };
+ static LIST_HEAD(list);
+ static DEFINE_SPINLOCK(lock);
+ struct task_struct *task = current;
+ struct mnt_path_entry *entry;
+ if (!mnt_path) {
+ if (!list_empty(&list)) {
+ struct mnt_path_entry *p;
+ entry = NULL;
+ /***** CRITICAL SECTION START *****/
+ spin_lock(&lock);
+ list_for_each_entry(p, &list, list) {
+ if (p->task != task)
+ continue;
+ list_del(&p->list);
+ entry = p;
+ break;
+ }
+ spin_unlock(&lock);
+ /***** CRITICAL SECTION END *****/
+ if (entry) {
+ mnt_path = entry->mnt_path;
+ kfree(entry);
+ }
+ }
+ return mnt_path;
+ }
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return NULL;
+ entry->task = task;
+ entry->mnt_path = mnt_path;
+ /***** CRITICAL SECTION START *****/
+ spin_lock(&lock);
+ list_add(&entry->list, &list);
+ spin_unlock(&lock);
+ /***** CRITICAL SECTION END *****/
+ return mnt_path;
+}
+
+static void mp_path_clear(void)
+{
+ kfree(mp_update_mnt_path(NULL));
+}
+
+static int mp_path_set(struct vfsmount *vfsmnt)
+{
+ char *sp;
+ struct path path = { vfsmnt, vfsmnt->mnt_root };
+ char *mnt_path = kmalloc(PATH_MAX, GFP_KERNEL);
+ mp_path_clear();
+ if (!mnt_path)
+ return -ENOMEM;
+ sp = d_path(&path, mnt_path, PATH_MAX - 1);
+ if (IS_ERR(sp)) {
+ kfree(mnt_path);
+ return -ENOMEM;
+ }
+ sp = kstrdup(sp, GFP_KERNEL);
+ kfree(mnt_path);
+ if (!sp)
+ return -ENOMEM;
+ return mp_update_mnt_path(sp) ? 0 : -ENOMEM;
+}
+
+static struct security_operations mp_security_ops = {
+ .name = "mnt_path",
+ .path_set = mp_path_set,
+ .path_clear = mp_path_clear,
+};
+
+static int __init mp_init(void)
+{
+ if (!security_module_enable(&mp_security_ops))
+ return 0;
+ if (register_security(&mp_security_ops))
+ panic("Failure registering mnt_path tracker");
+ printk(KERN_INFO "mnt_path tracker enabled.\n");
+ return 0;
+}
+
+security_initcall(mp_init);
--- linux-2.6.28-rc7-mm1.orig/security/security.c
+++ linux-2.6.28-rc7-mm1/security/security.c
@@ -355,6 +355,20 @@ int security_inode_init_security(struct
}
EXPORT_SYMBOL(security_inode_init_security);

+#ifdef CONFIG_SECURITY_PATH
+
+int security_path_set(struct vfsmount *vfsmnt)
+{
+ return security_ops->path_set(vfsmnt);
+}
+
+void security_path_clear(void)
+{
+ return security_ops->path_clear();
+}
+
+#endif
+
int security_inode_create(struct inode *dir, struct dentry *dentry, int mode)
{
if (unlikely(IS_PRIVATE(dir)))

2008-12-04 18:20:34

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [PATCH (mmotm-2008-12-02-17-08)] Introduce security_path_set/clear() hooks.

Quoting Tetsuo Handa ([email protected]):
> Hello.
>
> Stephen Smalley wrote:
> > On Wed, 2008-12-03 at 17:56 +0900, Kentaro Takeda wrote:
> > > Stephen, Serge,
> > > Here is the patch for introducing new security_path_set()/clear() hooks.
> > >
> > > This patch enables LSM module to remember vfsmount's pathname so that it can
> > > calculate absolute pathname in security_inode_*(). Since actual MAC can be
> > > performed after DAC, there will not be any noise in auditing and learning
> > > features. This patch currently assumes that the vfsmount's pathname is stored in
> > > hash table in LSM module. (Should I use stack memory?)
> > >
> > > Since security_inode_*() are not always called after security_path_set(),
> > > security_path_clear() hook is needed to free the remembered pathname.
> >
> > Your security_path_set()/security_path_clear() pairs look rather similar
> > to mnt_want_write()/mnt_drop_write() pairs. What if you were to call
> > your hooks from those functions, and then you would only need to add
> > further hook calls in the case of read-only and execute/search checks?
>
> Right. Locations of inserting security_path_set()/security_path_clear() pairs
> are subset of mnt_want_write()/mnt_drop_write() pairs. Thus, we can insert
> security_path_set()/security_path_clear() pairs into
> mnt_want_write()/mnt_drop_write() pairs, if we can tolerate performance
> regression. According to our rough measurement, there is about 8 - 22% of
> performance regression.

... compared to what, exactly?

If having CONFIG_SECURITY_PATH=y but TOMOYO disabled has this kind of
regression against just not having CONFIG_SECURITY_PATH, then no that is
not acceptable.

-serge

2008-12-04 21:41:48

by Tetsuo Handa

[permalink] [raw]
Subject: Re: [PATCH (mmotm-2008-12-02-17-08)] Introducesecurity_path_set/clear() hooks.

Hello.

Serge E. Hallyn wrote:
> > Right. Locations of inserting security_path_set()/security_path_clear() pairs
> > are subset of mnt_want_write()/mnt_drop_write() pairs. Thus, we can insert
> > security_path_set()/security_path_clear() pairs into
> > mnt_want_write()/mnt_drop_write() pairs, if we can tolerate performance
> > regression. According to our rough measurement, there is about 8 - 22% of
> > performance regression.
>
> ... compared to what, exactly?
>
> If having CONFIG_SECURITY_PATH=y but TOMOYO disabled has this kind of
> regression against just not having CONFIG_SECURITY_PATH, then no that is
> not acceptable.
>
Comparison between a module using mnt_path.c and a module not using mnt_path.c .
If mp_update_mnt_path() is not called, there is no performance regression.
TOMOYO will need mp_update_mnt_path().

Regards.

2008-12-05 21:56:12

by Stephen Smalley

[permalink] [raw]
Subject: Re: [PATCH (mmotm-2008-12-02-17-08)] Introduce security_path_set/clear() hooks.

On Thu, 2008-12-04 at 21:00 +0900, Tetsuo Handa wrote:
> Hello.
>
> Stephen Smalley wrote:
> > On Wed, 2008-12-03 at 17:56 +0900, Kentaro Takeda wrote:
> > > Stephen, Serge,
> > > Here is the patch for introducing new security_path_set()/clear() hooks.
> > >
> > > This patch enables LSM module to remember vfsmount's pathname so that it can
> > > calculate absolute pathname in security_inode_*(). Since actual MAC can be
> > > performed after DAC, there will not be any noise in auditing and learning
> > > features. This patch currently assumes that the vfsmount's pathname is stored in
> > > hash table in LSM module. (Should I use stack memory?)
> > >
> > > Since security_inode_*() are not always called after security_path_set(),
> > > security_path_clear() hook is needed to free the remembered pathname.
> >
> > Your security_path_set()/security_path_clear() pairs look rather similar
> > to mnt_want_write()/mnt_drop_write() pairs. What if you were to call
> > your hooks from those functions, and then you would only need to add
> > further hook calls in the case of read-only and execute/search checks?
>
> Right. Locations of inserting security_path_set()/security_path_clear() pairs
> are subset of mnt_want_write()/mnt_drop_write() pairs. Thus, we can insert
> security_path_set()/security_path_clear() pairs into
> mnt_want_write()/mnt_drop_write() pairs, if we can tolerate performance
> regression. According to our rough measurement, there is about 8 - 22% of
> performance regression. But this approach needs minimum modification to the
> existing kernel (only two hooks to be inserted).

I assume you also need separate hooks to cover the read-only open case?
As for your performance, your implementation of mp_* is clearly
non-optimal, so I'd expect there is plenty of room for improvement
there.

<snip>
> --- linux-2.6.28-rc7-mm1.orig/fs/namespace.c
> +++ linux-2.6.28-rc7-mm1/fs/namespace.c
> @@ -254,6 +254,10 @@ int mnt_want_write(struct vfsmount *mnt)
> int ret = 0;
> struct mnt_writer *cpu_writer;
>
> +#ifdef CONFIG_SECURITY_PATH
> + if (security_path_set(mnt) < 0)
> + return -ENOMEM;
> +#endif

No #ifdef's within the functions, of course. That gets handled by
security.h.

--
Stephen Smalley
National Security Agency

2008-12-05 23:28:07

by Tetsuo Handa

[permalink] [raw]
Subject: Re: [PATCH (mmotm-2008-12-02-17-08)] Introduce security_path_set/clear() hooks.

Hello.

Stephen Smalley wrote:
> > Right. Locations of inserting security_path_set()/security_path_clear() pairs
> > are subset of mnt_want_write()/mnt_drop_write() pairs. Thus, we can insert
> > security_path_set()/security_path_clear() pairs into
> > mnt_want_write()/mnt_drop_write() pairs, if we can tolerate performance
> > regression. According to our rough measurement, there is about 8 - 22% of
> > performance regression. But this approach needs minimum modification to the
> > existing kernel (only two hooks to be inserted).
>
> I assume you also need separate hooks to cover the read-only open case?

security_dentry_open() receives "struct file *", so I think we don't need
separate hooks for open(O_RDONLY).

> As for your performance, your implementation of mp_* is clearly
> non-optimal, so I'd expect there is plenty of room for improvement
> there.

Yes. Thus, I want to pass a caller identifier to mnt_want_write() so that
we can skip calculating vfsmount's pathname when it is not interested for
a LSM module (e.g. mnt_want_write() called for updating atime/ctime/mtime
checks).
May I add "int caller_id" to mnt_want_write()?

> No #ifdef's within the functions, of course. That gets handled by
> security.h.
OK.

Regards.

2008-12-06 05:25:35

by Tetsuo Handa

[permalink] [raw]
Subject: [RFC] Add "reason" parameter to mnt_want_write().

We want to allow LSM modules to perform MAC which takes an absolute pathname of
a requested file into account. Since we can't pass "struct vfsmount" to VFS
helper functions, we are trying to somehow pass "struct vfsmount"'s pathnames
instead of "struct vfsmount" itself.

The mnt_want_write() and mnt_drop_write() hooks are inserted around VFS helper
functions call. Thus, I think we can insert security_path_set() into
mnt_want_write() and secuity_path_clear() into mnt_drop_write() rather than
scattering security_path_set() and security_path_clear() all around the places.

But, mnt_want_write() and mnt_drop_write() are used for not only VFS helper
functions call but also various places. Thus, honestly calculating vfsmount's
pathnames for touch_atime()/file_update_time() operations triggers measurable
performance regression.

We want to skip calculating vfsmount's pathnames when the mnt_want_write()
call is not interested for the LSM modules.
This patch adds "reason" parameter to mnt_want_write() so that LSM modules can
calculate vfsmount's pathnames only when needed. The "reason" parameter is one
of constants listed below.

/* For subsequent VFS helper functions call. */
MNT_WANT_FOR_VFS_REQUEST, /* vfs_create()/vfs_mkdir() etc. */
/* For implicit write of inode's timestamps. */
MNT_WANT_FOR_UPDATE_ACMTIME, /* touch_atime()/file_update_time() */
/* For explicit write of inode's attributes and timestamps. */
MNT_WANT_FOR_UPDATE_ATTR, /* chmod()/chown()/utimes() etc. */
/* For filesystem's ioctl. */
MNT_WANT_FOR_FS_IOCTL, /* ext3_ioctl() etc. */
/* For opening a file for writing. */
MNT_WANT_FOR_OPEN_WRITE, /* __get_file_write_access() */
/* Needs to be granted since the caller doesn't check errors. */
MNT_WANT_ANYWAY /* init_file() */

Please review.

Signed-off-by: Tetsuo Handa <[email protected]>
---
fs/ext2/ioctl.c | 6 +++---
fs/ext3/ioctl.c | 10 +++++-----
fs/ext4/ioctl.c | 10 +++++-----
fs/fat/file.c | 2 +-
fs/file_table.c | 2 +-
fs/gfs2/ops_file.c | 2 +-
fs/hfsplus/ioctl.c | 2 +-
fs/inode.c | 4 ++--
fs/jfs/ioctl.c | 2 +-
fs/namei.c | 18 +++++++++---------
fs/namespace.c | 15 ++++++++++++++-
fs/ncpfs/ioctl.c | 2 +-
fs/nfsd/nfs4proc.c | 3 ++-
fs/nfsd/nfs4recover.c | 6 +++---
fs/nfsd/nfs4state.c | 3 ++-
fs/nfsd/vfs.c | 21 ++++++++++++++-------
fs/ocfs2/ioctl.c | 3 ++-
fs/open.c | 16 ++++++++--------
fs/reiserfs/ioctl.c | 5 +++--
fs/ubifs/ioctl.c | 2 +-
fs/utimes.c | 2 +-
fs/xattr.c | 12 ++++++------
fs/xfs/linux-2.6/xfs_ioctl.c | 6 ++++--
fs/xfs/linux-2.6/xfs_lrw.c | 3 ++-
include/linux/mount.h | 17 ++++++++++++++++-
ipc/mqueue.c | 4 ++--
net/unix/af_unix.c | 2 +-
27 files changed, 111 insertions(+), 69 deletions(-)

--- linux-2.6.28-rc7-mm1.orig/fs/ext2/ioctl.c
+++ linux-2.6.28-rc7-mm1/fs/ext2/ioctl.c
@@ -36,7 +36,7 @@ long ext2_ioctl(struct file *filp, unsig
case EXT2_IOC_SETFLAGS: {
unsigned int oldflags;

- ret = mnt_want_write(filp->f_path.mnt);
+ ret = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (ret)
return ret;

@@ -93,7 +93,7 @@ setflags_out:
case EXT2_IOC_SETVERSION:
if (!is_owner_or_cap(inode))
return -EPERM;
- ret = mnt_want_write(filp->f_path.mnt);
+ ret = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (ret)
return ret;
if (get_user(inode->i_generation, (int __user *) arg)) {
@@ -123,7 +123,7 @@ setflags_out:
if (get_user(rsv_window_size, (int __user *)arg))
return -EFAULT;

- ret = mnt_want_write(filp->f_path.mnt);
+ ret = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (ret)
return ret;

--- linux-2.6.28-rc7-mm1.orig/fs/ext3/ioctl.c
+++ linux-2.6.28-rc7-mm1/fs/ext3/ioctl.c
@@ -39,7 +39,7 @@ int ext3_ioctl (struct inode * inode, st
unsigned int oldflags;
unsigned int jflag;

- err = mnt_want_write(filp->f_path.mnt);
+ err = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (err)
return err;

@@ -141,7 +141,7 @@ flags_out:

if (!is_owner_or_cap(inode))
return -EPERM;
- err = mnt_want_write(filp->f_path.mnt);
+ err = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (err)
return err;
if (get_user(generation, (int __user *) arg)) {
@@ -202,7 +202,7 @@ setversion_out:
if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
return -ENOTTY;

- err = mnt_want_write(filp->f_path.mnt);
+ err = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (err)
return err;

@@ -244,7 +244,7 @@ setrsvsz_out:
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;

- err = mnt_want_write(filp->f_path.mnt);
+ err = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (err)
return err;

@@ -270,7 +270,7 @@ group_extend_out:
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;

- err = mnt_want_write(filp->f_path.mnt);
+ err = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (err)
return err;

--- linux-2.6.28-rc7-mm1.orig/fs/ext4/ioctl.c
+++ linux-2.6.28-rc7-mm1/fs/ext4/ioctl.c
@@ -44,7 +44,7 @@ long ext4_ioctl(struct file *filp, unsig
if (get_user(flags, (int __user *) arg))
return -EFAULT;

- err = mnt_want_write(filp->f_path.mnt);
+ err = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (err)
return err;

@@ -141,7 +141,7 @@ flags_out:
if (!is_owner_or_cap(inode))
return -EPERM;

- err = mnt_want_write(filp->f_path.mnt);
+ err = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (err)
return err;
if (get_user(generation, (int __user *) arg)) {
@@ -200,7 +200,7 @@ setversion_out:
if (get_user(n_blocks_count, (__u32 __user *)arg))
return -EFAULT;

- err = mnt_want_write(filp->f_path.mnt);
+ err = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (err)
return err;

@@ -226,7 +226,7 @@ setversion_out:
sizeof(input)))
return -EFAULT;

- err = mnt_want_write(filp->f_path.mnt);
+ err = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (err)
return err;

@@ -247,7 +247,7 @@ setversion_out:
if (!is_owner_or_cap(inode))
return -EACCES;

- err = mnt_want_write(filp->f_path.mnt);
+ err = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (err)
return err;
/*
--- linux-2.6.28-rc7-mm1.orig/fs/fat/file.c
+++ linux-2.6.28-rc7-mm1/fs/fat/file.c
@@ -47,7 +47,7 @@ int fat_generic_ioctl(struct inode *inod

mutex_lock(&inode->i_mutex);

- err = mnt_want_write(filp->f_path.mnt);
+ err = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (err)
goto up_no_drop_write;

--- linux-2.6.28-rc7-mm1.orig/fs/file_table.c
+++ linux-2.6.28-rc7-mm1/fs/file_table.c
@@ -210,7 +210,7 @@ int init_file(struct file *file, struct
*/
if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
file_take_write(file);
- error = mnt_want_write(mnt);
+ error = mnt_want_write(mnt, MNT_WANT_ANYWAY);
WARN_ON(error);
}
return error;
--- linux-2.6.28-rc7-mm1.orig/fs/gfs2/ops_file.c
+++ linux-2.6.28-rc7-mm1/fs/gfs2/ops_file.c
@@ -211,7 +211,7 @@ static int do_gfs2_set_flags(struct file
int error;
u32 new_flags, flags;

- error = mnt_want_write(filp->f_path.mnt);
+ error = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (error)
return error;

--- linux-2.6.28-rc7-mm1.orig/fs/hfsplus/ioctl.c
+++ linux-2.6.28-rc7-mm1/fs/hfsplus/ioctl.c
@@ -37,7 +37,7 @@ int hfsplus_ioctl(struct inode *inode, s
return put_user(flags, (int __user *)arg);
case HFSPLUS_IOC_EXT2_SETFLAGS: {
int err = 0;
- err = mnt_want_write(filp->f_path.mnt);
+ err = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (err)
return err;

--- linux-2.6.28-rc7-mm1.orig/fs/inode.c
+++ linux-2.6.28-rc7-mm1/fs/inode.c
@@ -1235,7 +1235,7 @@ void touch_atime(struct vfsmount *mnt, s
struct inode *inode = dentry->d_inode;
struct timespec now;

- if (mnt_want_write(mnt))
+ if (mnt_want_write(mnt, MNT_WANT_FOR_UPDATE_ACMTIME))
return;
if (inode->i_flags & S_NOATIME)
goto out;
@@ -1291,7 +1291,7 @@ void file_update_time(struct file *file)
if (IS_NOCMTIME(inode))
return;

- err = mnt_want_write(file->f_path.mnt);
+ err = mnt_want_write(file->f_path.mnt, MNT_WANT_FOR_UPDATE_ACMTIME);
if (err)
return;

--- linux-2.6.28-rc7-mm1.orig/fs/jfs/ioctl.c
+++ linux-2.6.28-rc7-mm1/fs/jfs/ioctl.c
@@ -68,7 +68,7 @@ long jfs_ioctl(struct file *filp, unsign
unsigned int oldflags;
int err;

- err = mnt_want_write(filp->f_path.mnt);
+ err = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (err)
return err;

--- linux-2.6.28-rc7-mm1.orig/fs/namei.c
+++ linux-2.6.28-rc7-mm1/fs/namei.c
@@ -1723,7 +1723,7 @@ do_last:
* a permanent write count is taken through
* the 'struct file' in nameidata_to_filp().
*/
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(nd.path.mnt, MNT_WANT_FOR_VFS_REQUEST);
if (error)
goto exit_mutex_unlock;
error = __open_namei_create(&nd, &path, flag, mode);
@@ -1775,7 +1775,7 @@ ok:
*/
will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);
if (will_write) {
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(nd.path.mnt, MNT_WANT_FOR_VFS_REQUEST);
if (error)
goto exit;
}
@@ -1996,7 +1996,7 @@ asmlinkage long sys_mknodat(int dfd, con
error = may_mknod(mode);
if (error)
goto out_dput;
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(nd.path.mnt, MNT_WANT_FOR_VFS_REQUEST);
if (error)
goto out_dput;
switch (mode & S_IFMT) {
@@ -2067,7 +2067,7 @@ asmlinkage long sys_mkdirat(int dfd, con

if (!IS_POSIXACL(nd.path.dentry->d_inode))
mode &= ~current->fs->umask;
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(nd.path.mnt, MNT_WANT_FOR_VFS_REQUEST);
if (error)
goto out_dput;
error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
@@ -2177,7 +2177,7 @@ static long do_rmdir(int dfd, const char
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto exit2;
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(nd.path.mnt, MNT_WANT_FOR_VFS_REQUEST);
if (error)
goto exit3;
error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
@@ -2262,7 +2262,7 @@ static long do_unlinkat(int dfd, const c
inode = dentry->d_inode;
if (inode)
atomic_inc(&inode->i_count);
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(nd.path.mnt, MNT_WANT_FOR_VFS_REQUEST);
if (error)
goto exit2;
error = vfs_unlink(nd.path.dentry->d_inode, dentry);
@@ -2343,7 +2343,7 @@ asmlinkage long sys_symlinkat(const char
if (IS_ERR(dentry))
goto out_unlock;

- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(nd.path.mnt, MNT_WANT_FOR_VFS_REQUEST);
if (error)
goto out_dput;
error = vfs_symlink(nd.path.dentry->d_inode, dentry, from);
@@ -2440,7 +2440,7 @@ asmlinkage long sys_linkat(int olddfd, c
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto out_unlock;
- error = mnt_want_write(nd.path.mnt);
+ error = mnt_want_write(nd.path.mnt, MNT_WANT_FOR_VFS_REQUEST);
if (error)
goto out_dput;
error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry);
@@ -2674,7 +2674,7 @@ asmlinkage long sys_renameat(int olddfd,
if (new_dentry == trap)
goto exit5;

- error = mnt_want_write(oldnd.path.mnt);
+ error = mnt_want_write(oldnd.path.mnt, MNT_WANT_FOR_VFS_REQUEST);
if (error)
goto exit5;
error = vfs_rename(old_dir->d_inode, old_dentry,
--- linux-2.6.28-rc7-mm1.orig/fs/namespace.c
+++ linux-2.6.28-rc7-mm1/fs/namespace.c
@@ -242,6 +242,7 @@ static inline void use_cpu_writer_for_mo
/**
* mnt_want_write - get write access to a mount
* @mnt: the mount on which to take a write
+ * @reason: reason for this call
*
* This tells the low-level filesystem that a write is
* about to be performed to it, and makes sure that
@@ -249,11 +250,16 @@ static inline void use_cpu_writer_for_mo
* the write operation is finished, mnt_drop_write()
* must be called. This is effectively a refcount.
*/
-int mnt_want_write(struct vfsmount *mnt)
+int mnt_want_write(struct vfsmount *mnt, const int reason)
{
int ret = 0;
struct mnt_writer *cpu_writer;

+ /*
+ ret = security_path_set(mnt, reason);
+ if (ret)
+ return ret;
+ */
cpu_writer = &get_cpu_var(mnt_writers);
spin_lock(&cpu_writer->lock);
if (__mnt_is_readonly(mnt)) {
@@ -265,6 +271,10 @@ int mnt_want_write(struct vfsmount *mnt)
out:
spin_unlock(&cpu_writer->lock);
put_cpu_var(mnt_writers);
+ /*
+ if (ret)
+ security_path_clear();
+ */
return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write);
@@ -362,6 +372,9 @@ void mnt_drop_write(struct vfsmount *mnt
* we could theoretically wrap __mnt_writers.
*/
put_cpu_var(mnt_writers);
+ /*
+ security_path_clear();
+ */
}
EXPORT_SYMBOL_GPL(mnt_drop_write);

--- linux-2.6.28-rc7-mm1.orig/fs/ncpfs/ioctl.c
+++ linux-2.6.28-rc7-mm1/fs/ncpfs/ioctl.c
@@ -861,7 +861,7 @@ int ncp_ioctl(struct inode *inode, struc
* -EACCESS, so it seems consistent to keep
* that here.
*/
- if (mnt_want_write(filp->f_path.mnt))
+ if (mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL))
return -EACCES;
}
ret = __ncp_ioctl(inode, filp, cmd, arg);
--- linux-2.6.28-rc7-mm1.orig/fs/nfsd/nfs4proc.c
+++ linux-2.6.28-rc7-mm1/fs/nfsd/nfs4proc.c
@@ -661,7 +661,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, st
return status;
}
}
- status = mnt_want_write(cstate->current_fh.fh_export->ex_path.mnt);
+ status = mnt_want_write(cstate->current_fh.fh_export->ex_path.mnt,
+ MNT_WANT_FOR_VFS_REQUEST);
if (status)
return status;
status = nfs_ok;
--- linux-2.6.28-rc7-mm1.orig/fs/nfsd/nfs4recover.c
+++ linux-2.6.28-rc7-mm1/fs/nfsd/nfs4recover.c
@@ -162,7 +162,7 @@ nfsd4_create_clid_dir(struct nfs4_client
dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
goto out_put;
}
- status = mnt_want_write(rec_dir.mnt);
+ status = mnt_want_write(rec_dir.mnt, MNT_WANT_FOR_VFS_REQUEST);
if (status)
goto out_put;
status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU);
@@ -326,7 +326,7 @@ nfsd4_remove_clid_dir(struct nfs4_client
if (!rec_dir_init || !clp->cl_firststate)
return;

- status = mnt_want_write(rec_dir.mnt);
+ status = mnt_want_write(rec_dir.mnt, MNT_WANT_FOR_VFS_REQUEST);
if (status)
goto out;
clp->cl_firststate = 0;
@@ -369,7 +369,7 @@ nfsd4_recdir_purge_old(void) {

if (!rec_dir_init)
return;
- status = mnt_want_write(rec_dir.mnt);
+ status = mnt_want_write(rec_dir.mnt, MNT_WANT_FOR_VFS_REQUEST);
if (status)
goto out;
status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old);
--- linux-2.6.28-rc7-mm1.orig/fs/nfsd/nfs4state.c
+++ linux-2.6.28-rc7-mm1/fs/nfsd/nfs4state.c
@@ -1587,7 +1587,8 @@ nfs4_upgrade_open(struct svc_rqst *rqstp
int err = get_write_access(inode);
if (err)
return nfserrno(err);
- err = mnt_want_write(cur_fh->fh_export->ex_path.mnt);
+ err = mnt_want_write(cur_fh->fh_export->ex_path.mnt,
+ MNT_WANT_FOR_VFS_REQUEST);
if (err)
return nfserrno(err);
file_take_write(filp);
--- linux-2.6.28-rc7-mm1.orig/fs/nfsd/vfs.c
+++ linux-2.6.28-rc7-mm1/fs/nfsd/vfs.c
@@ -1261,7 +1261,8 @@ nfsd_create(struct svc_rqst *rqstp, stru
goto out;
}

- host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+ host_err = mnt_want_write(fhp->fh_export->ex_path.mnt,
+ MNT_WANT_FOR_VFS_REQUEST);
if (host_err)
goto out_nfserr;

@@ -1374,7 +1375,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, s
v_atime = verifier[1]&0x7fffffff;
}

- host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+ host_err = mnt_want_write(fhp->fh_export->ex_path.mnt,
+ MNT_WANT_FOR_VFS_REQUEST);
if (host_err)
goto out_nfserr;
if (dchild->d_inode) {
@@ -1538,7 +1540,8 @@ nfsd_symlink(struct svc_rqst *rqstp, str
if (IS_ERR(dnew))
goto out_nfserr;

- host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+ host_err = mnt_want_write(fhp->fh_export->ex_path.mnt,
+ MNT_WANT_FOR_VFS_REQUEST);
if (host_err)
goto out_nfserr;

@@ -1614,7 +1617,8 @@ nfsd_link(struct svc_rqst *rqstp, struct
dold = tfhp->fh_dentry;
dest = dold->d_inode;

- host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt);
+ host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt,
+ MNT_WANT_FOR_VFS_REQUEST);
if (host_err) {
err = nfserrno(host_err);
goto out_dput;
@@ -1716,7 +1720,8 @@ nfsd_rename(struct svc_rqst *rqstp, stru
host_err = -EXDEV;
if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
goto out_dput_new;
- host_err = mnt_want_write(ffhp->fh_export->ex_path.mnt);
+ host_err = mnt_want_write(ffhp->fh_export->ex_path.mnt,
+ MNT_WANT_FOR_VFS_REQUEST);
if (host_err)
goto out_dput_new;

@@ -1787,7 +1792,8 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
if (!type)
type = rdentry->d_inode->i_mode & S_IFMT;

- host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+ host_err = mnt_want_write(fhp->fh_export->ex_path.mnt,
+ MNT_WANT_FOR_VFS_REQUEST);
if (host_err)
goto out_nfserr;

@@ -2188,7 +2194,8 @@ nfsd_set_posix_acl(struct svc_fh *fhp, i
} else
size = 0;

- error = mnt_want_write(fhp->fh_export->ex_path.mnt);
+ error = mnt_want_write(fhp->fh_export->ex_path.mnt,
+ MNT_WANT_FOR_VFS_REQUEST);
if (error)
goto getout;
if (size)
--- linux-2.6.28-rc7-mm1.orig/fs/ocfs2/ioctl.c
+++ linux-2.6.28-rc7-mm1/fs/ocfs2/ioctl.c
@@ -129,7 +129,8 @@ long ocfs2_ioctl(struct file *filp, unsi
if (get_user(flags, (int __user *) arg))
return -EFAULT;

- status = mnt_want_write(filp->f_path.mnt);
+ status = mnt_want_write(filp->f_path.mnt,
+ MNT_WANT_FOR_FS_IOCTL);
if (status)
return status;
status = ocfs2_set_inode_attr(inode, flags,
--- linux-2.6.28-rc7-mm1.orig/fs/open.c
+++ linux-2.6.28-rc7-mm1/fs/open.c
@@ -247,7 +247,7 @@ static long do_sys_truncate(const char _
if (!S_ISREG(inode->i_mode))
goto dput_and_out;

- error = mnt_want_write(path.mnt);
+ error = mnt_want_write(path.mnt, MNT_WANT_FOR_VFS_REQUEST);
if (error)
goto dput_and_out;

@@ -587,7 +587,7 @@ asmlinkage long sys_fchmod(unsigned int

audit_inode(NULL, dentry);

- err = mnt_want_write(file->f_path.mnt);
+ err = mnt_want_write(file->f_path.mnt, MNT_WANT_FOR_UPDATE_ATTR);
if (err)
goto out_putf;
mutex_lock(&inode->i_mutex);
@@ -617,7 +617,7 @@ asmlinkage long sys_fchmodat(int dfd, co
goto out;
inode = path.dentry->d_inode;

- error = mnt_want_write(path.mnt);
+ error = mnt_want_write(path.mnt, MNT_WANT_FOR_UPDATE_ATTR);
if (error)
goto dput_and_out;
mutex_lock(&inode->i_mutex);
@@ -672,7 +672,7 @@ asmlinkage long sys_chown(const char __u
error = user_path(filename, &path);
if (error)
goto out;
- error = mnt_want_write(path.mnt);
+ error = mnt_want_write(path.mnt, MNT_WANT_FOR_UPDATE_ATTR);
if (error)
goto out_release;
error = chown_common(path.dentry, user, group);
@@ -697,7 +697,7 @@ asmlinkage long sys_fchownat(int dfd, co
error = user_path_at(dfd, filename, follow, &path);
if (error)
goto out;
- error = mnt_want_write(path.mnt);
+ error = mnt_want_write(path.mnt, MNT_WANT_FOR_UPDATE_ATTR);
if (error)
goto out_release;
error = chown_common(path.dentry, user, group);
@@ -716,7 +716,7 @@ asmlinkage long sys_lchown(const char __
error = user_lpath(filename, &path);
if (error)
goto out;
- error = mnt_want_write(path.mnt);
+ error = mnt_want_write(path.mnt, MNT_WANT_FOR_UPDATE_ATTR);
if (error)
goto out_release;
error = chown_common(path.dentry, user, group);
@@ -738,7 +738,7 @@ asmlinkage long sys_fchown(unsigned int
if (!file)
goto out;

- error = mnt_want_write(file->f_path.mnt);
+ error = mnt_want_write(file->f_path.mnt, MNT_WANT_FOR_UPDATE_ATTR);
if (error)
goto out_fput;
dentry = file->f_path.dentry;
@@ -773,7 +773,7 @@ static inline int __get_file_write_acces
/*
* Balanced in __fput()
*/
- error = mnt_want_write(mnt);
+ error = mnt_want_write(mnt, MNT_WANT_FOR_OPEN_WRITE);
if (error)
put_write_access(inode);
}
--- linux-2.6.28-rc7-mm1.orig/fs/reiserfs/ioctl.c
+++ linux-2.6.28-rc7-mm1/fs/reiserfs/ioctl.c
@@ -48,7 +48,8 @@ int reiserfs_ioctl(struct inode *inode,
if (!reiserfs_attrs(inode->i_sb))
return -ENOTTY;

- err = mnt_want_write(filp->f_path.mnt);
+ err = mnt_want_write(filp->f_path.mnt,
+ MNT_WANT_FOR_FS_IOCTL);
if (err)
return err;

@@ -97,7 +98,7 @@ setflags_out:
case REISERFS_IOC_SETVERSION:
if (!is_owner_or_cap(inode))
return -EPERM;
- err = mnt_want_write(filp->f_path.mnt);
+ err = mnt_want_write(filp->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (err)
return err;
if (get_user(inode->i_generation, (int __user *)arg)) {
--- linux-2.6.28-rc7-mm1.orig/fs/ubifs/ioctl.c
+++ linux-2.6.28-rc7-mm1/fs/ubifs/ioctl.c
@@ -173,7 +173,7 @@ long ubifs_ioctl(struct file *file, unsi
* Make sure the file-system is read-write and make sure it
* will not become read-only while we are changing the flags.
*/
- err = mnt_want_write(file->f_path.mnt);
+ err = mnt_want_write(file->f_path.mnt, MNT_WANT_FOR_FS_IOCTL);
if (err)
return err;
err = setflags(inode, flags);
--- linux-2.6.28-rc7-mm1.orig/fs/utimes.c
+++ linux-2.6.28-rc7-mm1/fs/utimes.c
@@ -54,7 +54,7 @@ static int utimes_common(struct path *pa
struct iattr newattrs;
struct inode *inode = path->dentry->d_inode;

- error = mnt_want_write(path->mnt);
+ error = mnt_want_write(path->mnt, MNT_WANT_FOR_UPDATE_ATTR);
if (error)
goto out;

--- linux-2.6.28-rc7-mm1.orig/fs/xattr.c
+++ linux-2.6.28-rc7-mm1/fs/xattr.c
@@ -261,7 +261,7 @@ sys_setxattr(const char __user *pathname
error = user_path(pathname, &path);
if (error)
return error;
- error = mnt_want_write(path.mnt);
+ error = mnt_want_write(path.mnt, MNT_WANT_FOR_UPDATE_ATTR);
if (!error) {
error = setxattr(path.dentry, name, value, size, flags);
mnt_drop_write(path.mnt);
@@ -280,7 +280,7 @@ sys_lsetxattr(const char __user *pathnam
error = user_lpath(pathname, &path);
if (error)
return error;
- error = mnt_want_write(path.mnt);
+ error = mnt_want_write(path.mnt, MNT_WANT_FOR_UPDATE_ATTR);
if (!error) {
error = setxattr(path.dentry, name, value, size, flags);
mnt_drop_write(path.mnt);
@@ -302,7 +302,7 @@ sys_fsetxattr(int fd, const char __user
return error;
dentry = f->f_path.dentry;
audit_inode(NULL, dentry);
- error = mnt_want_write(f->f_path.mnt);
+ error = mnt_want_write(f->f_path.mnt, MNT_WANT_FOR_UPDATE_ATTR);
if (!error) {
error = setxattr(dentry, name, value, size, flags);
mnt_drop_write(f->f_path.mnt);
@@ -494,7 +494,7 @@ sys_removexattr(const char __user *pathn
error = user_path(pathname, &path);
if (error)
return error;
- error = mnt_want_write(path.mnt);
+ error = mnt_want_write(path.mnt, MNT_WANT_FOR_UPDATE_ATTR);
if (!error) {
error = removexattr(path.dentry, name);
mnt_drop_write(path.mnt);
@@ -512,7 +512,7 @@ sys_lremovexattr(const char __user *path
error = user_lpath(pathname, &path);
if (error)
return error;
- error = mnt_want_write(path.mnt);
+ error = mnt_want_write(path.mnt, MNT_WANT_FOR_UPDATE_ATTR);
if (!error) {
error = removexattr(path.dentry, name);
mnt_drop_write(path.mnt);
@@ -533,7 +533,7 @@ sys_fremovexattr(int fd, const char __us
return error;
dentry = f->f_path.dentry;
audit_inode(NULL, dentry);
- error = mnt_want_write(f->f_path.mnt);
+ error = mnt_want_write(f->f_path.mnt, MNT_WANT_FOR_UPDATE_ATTR);
if (!error) {
error = removexattr(dentry, name);
mnt_drop_write(f->f_path.mnt);
--- linux-2.6.28-rc7-mm1.orig/fs/xfs/linux-2.6/xfs_ioctl.c
+++ linux-2.6.28-rc7-mm1/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -629,7 +629,8 @@ xfs_attrmulti_by_handle(
&ops[i].am_length, ops[i].am_flags);
break;
case ATTR_OP_SET:
- ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+ ops[i].am_error = mnt_want_write(parfilp->f_path.mnt,
+ MNT_WANT_FOR_FS_IOCTL);
if (ops[i].am_error)
break;
ops[i].am_error = xfs_attrmulti_attr_set(inode,
@@ -638,7 +639,8 @@ xfs_attrmulti_by_handle(
mnt_drop_write(parfilp->f_path.mnt);
break;
case ATTR_OP_REMOVE:
- ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+ ops[i].am_error = mnt_want_write(parfilp->f_path.mnt,
+ MNT_WANT_FOR_FS_IOCTL);
if (ops[i].am_error)
break;
ops[i].am_error = xfs_attrmulti_attr_remove(inode,
--- linux-2.6.28-rc7-mm1.orig/fs/xfs/linux-2.6/xfs_lrw.c
+++ linux-2.6.28-rc7-mm1/fs/xfs/linux-2.6/xfs_lrw.c
@@ -673,7 +673,8 @@ start:
* filesystems. Throw it away if anyone asks us.
*/
if (likely(!(ioflags & IO_INVIS) &&
- !mnt_want_write(file->f_path.mnt))) {
+ !mnt_want_write(file->f_path.mnt,
+ MNT_WANT_FOR_UPDATE_ACMTIME))) {
xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
mnt_drop_write(file->f_path.mnt);
}
--- linux-2.6.28-rc7-mm1.orig/include/linux/mount.h
+++ linux-2.6.28-rc7-mm1/include/linux/mount.h
@@ -78,7 +78,22 @@ static inline struct vfsmount *mntget(st
return mnt;
}

-extern int mnt_want_write(struct vfsmount *mnt);
+enum mnt_want_reasons {
+ /* For subsequent VFS helper functions call. */
+ MNT_WANT_FOR_VFS_REQUEST, /* vfs_create()/vfs_mkdir() etc. */
+ /* For implicit write of inode's timestamps. */
+ MNT_WANT_FOR_UPDATE_ACMTIME, /* touch_atime()/file_update_time() */
+ /* For explicit write of inode's attributes and timestamps. */
+ MNT_WANT_FOR_UPDATE_ATTR, /* chmod()/chown()/utimes() etc. */
+ /* For filesystem's ioctl. */
+ MNT_WANT_FOR_FS_IOCTL, /* ext3_ioctl() etc. */
+ /* For opening a file for writing. */
+ MNT_WANT_FOR_OPEN_WRITE, /* __get_file_write_access() */
+ /* Needs to be granted since the caller doesn't check errors. */
+ MNT_WANT_ANYWAY /* init_file() */
+};
+
+extern int mnt_want_write(struct vfsmount *mnt, const int reason);
extern void mnt_drop_write(struct vfsmount *mnt);
extern void mntput_no_expire(struct vfsmount *mnt);
extern void mnt_pin(struct vfsmount *mnt);
--- linux-2.6.28-rc7-mm1.orig/ipc/mqueue.c
+++ linux-2.6.28-rc7-mm1/ipc/mqueue.c
@@ -611,7 +611,7 @@ static struct file *do_create(struct den
}

mode &= ~current->fs->umask;
- ret = mnt_want_write(mqueue_mnt);
+ ret = mnt_want_write(mqueue_mnt, MNT_WANT_FOR_VFS_REQUEST);
if (ret)
goto out;
ret = vfs_create(dir->d_inode, dentry, mode, NULL);
@@ -753,7 +753,7 @@ asmlinkage long sys_mq_unlink(const char
inode = dentry->d_inode;
if (inode)
atomic_inc(&inode->i_count);
- err = mnt_want_write(mqueue_mnt);
+ err = mnt_want_write(mqueue_mnt, MNT_WANT_FOR_VFS_REQUEST);
if (err)
goto out_err;
err = vfs_unlink(dentry->d_parent->d_inode, dentry);
--- linux-2.6.28-rc7-mm1.orig/net/unix/af_unix.c
+++ linux-2.6.28-rc7-mm1/net/unix/af_unix.c
@@ -833,7 +833,7 @@ static int unix_bind(struct socket *sock
*/
mode = S_IFSOCK |
(SOCK_INODE(sock)->i_mode & ~current->fs->umask);
- err = mnt_want_write(nd.path.mnt);
+ err = mnt_want_write(nd.path.mnt, MNT_WANT_FOR_VFS_REQUEST);
if (err)
goto out_mknod_dput;
err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);

2008-12-06 05:54:17

by Al Viro

[permalink] [raw]
Subject: Re: [RFC] Add "reason" parameter to mnt_want_write().

On Sat, Dec 06, 2008 at 02:25:01PM +0900, Tetsuo Handa wrote:
> We want to allow LSM modules to perform MAC which takes an absolute pathname of
> a requested file into account. Since we can't pass "struct vfsmount" to VFS
> helper functions, we are trying to somehow pass "struct vfsmount"'s pathnames
> instead of "struct vfsmount" itself.
>
> The mnt_want_write() and mnt_drop_write() hooks are inserted around VFS helper
> functions call. Thus, I think we can insert security_path_set() into
> mnt_want_write() and secuity_path_clear() into mnt_drop_write() rather than
> scattering security_path_set() and security_path_clear() all around the places.

No. Use separate set of hooks AND PASS vfsmount DIRECTLY TO THEM. Damnit,
people, just how many times does it have to be repeated?

Any version that pulls that class of tricks is no-go. I don't _CARE_ whether
you hide vfsmount in task struct, do the same with string, send yourself a
datagram over magic socket or mail it to kludges-R-US.webtv.com, downloading
it back in LSM hook.

It's not a problem with implementation; it's a problem with the kludge
itself *and* with having the effect of vfs_mkdir() et.al. dependent on
anything except the arguments it's getting.

Adding global context of that kind is every bit as wrong as passing vfsmount
(or absolute pathname, or...) to vfs_mkdir() and its ilk. It's worse,
actually, since it has an extra helping of ugliness on top of doing the
wrong thing.

2008-12-06 06:16:57

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH (mmotm-2008-12-02-17-08)] Introduce security_path_set/clear() hooks.

On Fri, Dec 05, 2008 at 04:53:18PM -0500, Stephen Smalley wrote:
> > Right. Locations of inserting security_path_set()/security_path_clear() pairs
> > are subset of mnt_want_write()/mnt_drop_write() pairs. Thus, we can insert
> > security_path_set()/security_path_clear() pairs into
> > mnt_want_write()/mnt_drop_write() pairs, if we can tolerate performance
> > regression. According to our rough measurement, there is about 8 - 22% of
> > performance regression. But this approach needs minimum modification to the
> > existing kernel (only two hooks to be inserted).
>
> I assume you also need separate hooks to cover the read-only open case?
> As for your performance, your implementation of mp_* is clearly
> non-optimal, so I'd expect there is plenty of room for improvement
> there.

And just what will happen if you end up with foo_mkdir() calling something
that does e.g. pathname resolution in fs-controlled private namespace and
creates/removes some files there?