2023-04-18 17:30:38

by Junxiao Bi

[permalink] [raw]
Subject: [PATCH V3] debugfs: allow access blktrace trace files in lockdown mode

blktrace trace files are per-cpu relay files that are used by kernel to
export IO metadata(IO events, type, target disk, offset and len etc.) to
userspace, no data from IO itself will be exported. These trace files have
permission 0400, but mmap is supported, so they are blocked by lockdown.
Skip lockdown for these files to allow blktrace work in lockdown mode.

v3 <- v2:
allow only blktrace trace file instead of relay files
https://lore.kernel.org/lkml/[email protected]/T/

v2 <- v1:
Fix build error when CONFIG_RELAY is not defined.
Reported-by: kernel test robot <[email protected]>
Link: https://lore.kernel.org/oe-kbuild-all/[email protected]/
Signed-off-by: Junxiao Bi <[email protected]>
---
fs/debugfs/file.c | 10 ++++++++++
include/linux/blktrace_api.h | 3 +++
include/linux/relay.h | 3 +++
kernel/relay.c | 16 ++++++++++++++++
kernel/trace/blktrace.c | 7 +++++++
5 files changed, 39 insertions(+)

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 1f971c880dde..973e38f3e8a1 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -21,6 +21,7 @@
#include <linux/pm_runtime.h>
#include <linux/poll.h>
#include <linux/security.h>
+#include <linux/blktrace_api.h>

#include "internal.h"

@@ -142,6 +143,12 @@ EXPORT_SYMBOL_GPL(debugfs_file_put);
* Only permit access to world-readable files when the kernel is locked down.
* We also need to exclude any file that has ways to write or alter it as root
* can bypass the permissions check.
+ * Exception:
+ * blktrace trace files are per-cpu relay files that are used by kernel to
+ * export IO metadata(IO events, type, target disk, offset and len etc.) to
+ * userspace, no data from IO itself will be exported. These trace files have
+ * permission 0400, but mmap is supported, so they are blocked by lockdown.
+ * Skip lockdown for these files to allow blktrace work in lockdown mode.
*/
static int debugfs_locked_down(struct inode *inode,
struct file *filp,
@@ -154,6 +161,9 @@ static int debugfs_locked_down(struct inode *inode,
!real_fops->mmap)
return 0;

+ if (blk_trace_is_tracefile(inode, real_fops))
+ return 0;
+
if (security_locked_down(LOCKDOWN_DEBUGFS))
return -EPERM;

diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index cfbda114348c..42db54434d7a 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -78,6 +78,8 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
char __user *arg);
extern int blk_trace_startstop(struct request_queue *q, int start);
extern int blk_trace_remove(struct request_queue *q);
+extern bool blk_trace_is_tracefile(struct inode *inode,
+ const struct file_operations *fops);

#else /* !CONFIG_BLK_DEV_IO_TRACE */
# define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY)
@@ -89,6 +91,7 @@ extern int blk_trace_remove(struct request_queue *q);
# define blk_add_trace_msg(q, fmt, ...) do { } while (0)
# define blk_add_cgroup_trace_msg(q, cg, fmt, ...) do { } while (0)
# define blk_trace_note_message_enabled(q) (false)
+# define blk_trace_is_tracefile(inode, fops) (false)
#endif /* CONFIG_BLK_DEV_IO_TRACE */

#ifdef CONFIG_COMPAT
diff --git a/include/linux/relay.h b/include/linux/relay.h
index 72b876dd5cb8..2346137adc94 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -279,8 +279,11 @@ extern const struct file_operations relay_file_operations;

#ifdef CONFIG_RELAY
int relay_prepare_cpu(unsigned int cpu);
+extern const struct rchan_callbacks *relay_get_cb(struct inode *inode,
+ const struct file_operations *fops);
#else
#define relay_prepare_cpu NULL
+#define relay_get_cb(inode, fops) NULL
#endif

#endif /* _LINUX_RELAY_H */
diff --git a/kernel/relay.c b/kernel/relay.c
index 9aa70ae53d24..c97ade998311 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1234,6 +1234,22 @@ static ssize_t relay_file_splice_read(struct file *in,
return ret;
}

+const struct rchan_callbacks *relay_get_cb(struct inode *inode,
+ const struct file_operations *fops)
+{
+ struct rchan_buf *buf;
+
+ /* Not a relay file */
+ if (fops != &relay_file_operations)
+ return NULL;
+ buf = (struct rchan_buf *)inode->i_private;
+ if (buf && buf->chan)
+ return buf->chan->cb;
+ else
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(relay_get_cb);
+
const struct file_operations relay_file_operations = {
.open = relay_file_open,
.poll = relay_file_poll,
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index d5d94510afd3..67e06a65d552 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -639,6 +639,13 @@ static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
return 0;
}

+bool blk_trace_is_tracefile(struct inode *inode,
+ const struct file_operations *fops)
+{
+ return relay_get_cb(inode, fops) == &blk_relay_callbacks;
+}
+EXPORT_SYMBOL_GPL(blk_trace_is_tracefile);
+
int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
struct block_device *bdev,
char __user *arg)
--
2.24.3 (Apple Git-128)


2023-04-19 18:06:24

by Paul Moore

[permalink] [raw]
Subject: Re: [PATCH V3] debugfs: allow access blktrace trace files in lockdown mode

On Tue, Apr 18, 2023 at 1:27 PM Junxiao Bi <[email protected]> wrote:
>
> blktrace trace files are per-cpu relay files that are used by kernel to
> export IO metadata(IO events, type, target disk, offset and len etc.) to
> userspace, no data from IO itself will be exported. These trace files have
> permission 0400, but mmap is supported, so they are blocked by lockdown.
> Skip lockdown for these files to allow blktrace work in lockdown mode.
>
> v3 <- v2:
> allow only blktrace trace file instead of relay files
> https://lore.kernel.org/lkml/[email protected]/T/
>
> v2 <- v1:
> Fix build error when CONFIG_RELAY is not defined.
> Reported-by: kernel test robot <[email protected]>
> Link: https://lore.kernel.org/oe-kbuild-all/[email protected]/
> Signed-off-by: Junxiao Bi <[email protected]>
> ---
> fs/debugfs/file.c | 10 ++++++++++
> include/linux/blktrace_api.h | 3 +++
> include/linux/relay.h | 3 +++
> kernel/relay.c | 16 ++++++++++++++++
> kernel/trace/blktrace.c | 7 +++++++
> 5 files changed, 39 insertions(+)
>
> diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
> index 1f971c880dde..973e38f3e8a1 100644
> --- a/fs/debugfs/file.c
> +++ b/fs/debugfs/file.c
> @@ -21,6 +21,7 @@
> #include <linux/pm_runtime.h>
> #include <linux/poll.h>
> #include <linux/security.h>
> +#include <linux/blktrace_api.h>
>
> #include "internal.h"
>
> @@ -142,6 +143,12 @@ EXPORT_SYMBOL_GPL(debugfs_file_put);
> * Only permit access to world-readable files when the kernel is locked down.
> * We also need to exclude any file that has ways to write or alter it as root
> * can bypass the permissions check.
> + * Exception:
> + * blktrace trace files are per-cpu relay files that are used by kernel to
> + * export IO metadata(IO events, type, target disk, offset and len etc.) to
> + * userspace, no data from IO itself will be exported. These trace files have
> + * permission 0400, but mmap is supported, so they are blocked by lockdown.
> + * Skip lockdown for these files to allow blktrace work in lockdown mode.
> */
> static int debugfs_locked_down(struct inode *inode,
> struct file *filp,
> @@ -154,6 +161,9 @@ static int debugfs_locked_down(struct inode *inode,
> !real_fops->mmap)
> return 0;
>
> + if (blk_trace_is_tracefile(inode, real_fops))
> + return 0;

I think it would be a little more foolproof if we made the connection
to lockdown a bit more explicit in the relay/blktrace code. How about
something like this here, borrowing your previously defined
'is_relay_file()' function:

if (is_relay_file(real_fops) && relay_bypass_lockdown(inode, real_fops))
return 0;

... and in the relay code we would have something like this, borrowing
from your logic in this patch, and using some shortcut-y pseudo-code:

bool relay_bypass_lockdown(struct inode *inode,
const struct file_operations *fops)
{
struct rchan_buf *buf = inode->i_private;

if (buf->chan->cb->bypass_lockdown)
return buf->chan->cb->bypass_lockdown(inode);

return false;
}

... where in the case of blktrace rchan_callbacks::bypass_lockdown
would be a simple "true", assuming it is safe to do so (we need some
ACKs from the blktrace folks):

bool blk_trace_bypass_lockdown(struct inode *inode)
{
return true;
}

--
paul-moore.com

2023-04-20 15:22:11

by Junxiao Bi

[permalink] [raw]
Subject: Re: [PATCH V3] debugfs: allow access blktrace trace files in lockdown mode



> On Apr 19, 2023, at 11:00 AM, Paul Moore <[email protected]> wrote:
>
> On Tue, Apr 18, 2023 at 1:27 PM Junxiao Bi <[email protected]> wrote:
>>
>> blktrace trace files are per-cpu relay files that are used by kernel to
>> export IO metadata(IO events, type, target disk, offset and len etc.) to
>> userspace, no data from IO itself will be exported. These trace files have
>> permission 0400, but mmap is supported, so they are blocked by lockdown.
>> Skip lockdown for these files to allow blktrace work in lockdown mode.
>>
>> v3 <- v2:
>> allow only blktrace trace file instead of relay files
>> https://lore.kernel.org/lkml/[email protected]/T/
>>
>> v2 <- v1:
>> Fix build error when CONFIG_RELAY is not defined.
>> Reported-by: kernel test robot <[email protected]>
>> Link: https://lore.kernel.org/oe-kbuild-all/[email protected]/
>> Signed-off-by: Junxiao Bi <[email protected]>
>> ---
>> fs/debugfs/file.c | 10 ++++++++++
>> include/linux/blktrace_api.h | 3 +++
>> include/linux/relay.h | 3 +++
>> kernel/relay.c | 16 ++++++++++++++++
>> kernel/trace/blktrace.c | 7 +++++++
>> 5 files changed, 39 insertions(+)
>>
>> diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
>> index 1f971c880dde..973e38f3e8a1 100644
>> --- a/fs/debugfs/file.c
>> +++ b/fs/debugfs/file.c
>> @@ -21,6 +21,7 @@
>> #include <linux/pm_runtime.h>
>> #include <linux/poll.h>
>> #include <linux/security.h>
>> +#include <linux/blktrace_api.h>
>>
>> #include "internal.h"
>>
>> @@ -142,6 +143,12 @@ EXPORT_SYMBOL_GPL(debugfs_file_put);
>> * Only permit access to world-readable files when the kernel is locked down.
>> * We also need to exclude any file that has ways to write or alter it as root
>> * can bypass the permissions check.
>> + * Exception:
>> + * blktrace trace files are per-cpu relay files that are used by kernel to
>> + * export IO metadata(IO events, type, target disk, offset and len etc.) to
>> + * userspace, no data from IO itself will be exported. These trace files have
>> + * permission 0400, but mmap is supported, so they are blocked by lockdown.
>> + * Skip lockdown for these files to allow blktrace work in lockdown mode.
>> */
>> static int debugfs_locked_down(struct inode *inode,
>> struct file *filp,
>> @@ -154,6 +161,9 @@ static int debugfs_locked_down(struct inode *inode,
>> !real_fops->mmap)
>> return 0;
>>
>> + if (blk_trace_is_tracefile(inode, real_fops))
>> + return 0;
>
> I think it would be a little more foolproof if we made the connection
> to lockdown a bit more explicit in the relay/blktrace code. How about
> something like this here, borrowing your previously defined
> 'is_relay_file()' function:
>
> if (is_relay_file(real_fops) && relay_bypass_lockdown(inode, real_fops))
> return 0;
>
> ... and in the relay code we would have something like this, borrowing
> from your logic in this patch, and using some shortcut-y pseudo-code:
>
> bool relay_bypass_lockdown(struct inode *inode,
> const struct file_operations *fops)
> {
> struct rchan_buf *buf = inode->i_private;
>
> if (buf->chan->cb->bypass_lockdown)
> return buf->chan->cb->bypass_lockdown(inode);
>
> return false;
> }
>
> ... where in the case of blktrace rchan_callbacks::bypass_lockdown
> would be a simple "true", assuming it is safe to do so (we need some
> ACKs from the blktrace folks):
>
> bool blk_trace_bypass_lockdown(struct inode *inode)
> {
> return true;
> }
Good idea. Will make a new version for it. Thanks.
>
> --
> paul-moore.com