Hi,
This is a second RFC with an implementation to support superblock and
specifically ext4 notifications over the watch_queue interface, as
originally proposed by David Howells. The original cover letter
follows.
This version of the RFC introduces the design changes requested by Ted
on the previous version (thanks). It folds the _inode_error and
_inode_warning types into their error and warning counterparts. This
version also introduces a patch to samples/ exemplifying how the
interface can be used.
I'm still sending it as an RFC as I'd love to gather a bit more
feedback, before actually proposing it for merging.
Dave, can you comment on the changes to watch_queue and how it fits
your original watch_queue model?
The reasoning for this work, and some background can be found in the
cover letter below.
I also shared the patches at:
https://gitlab.collabora.com/krisman/linux.git
under the tag ext4-error-notifications_RFC-v2
Thanks,
---
Original cover letter:
Google has been using an out-of-tree mechanism for error notification in
Ext4 and we decided it is time to push for an upstream solution. This
would surely fit on top of David's notification work.
This patchset is an attempt to restart that discussion. It forward ports
some code from David on top of Linus tree, adds features to
watch_queue and implements ext4 support.
The new notifications are designed after ext4 messages, so it exposes
notifications types to fit that filesystem, but it doesn't change much
to other filesystems, so it should be easily extensible.
I'm aware of the discussion around fsinfo, but I'd like to ask if there
are other missing pieces and what we could do to help that work go
upstream. From a previous mailing list discussion, Linus complained
about lack of users as a main reason for it to not be merged, so hey! :)
In addition, I'd like to ask for feedback on the current implementation,
specifically regarding the passing of extra unformatted information at
the end of the notification and the ext4 support.
The work, as shared on this patchset can be found at:
https://gitlab.collabora.com/krisman/linux.git -b ext4-error-notifications
And there is an example code at:
https://gitlab.collabora.com/krisman/ext4-watcher
I'm Cc'ing Khazhismel Kumykov, from Google, who can provide more
information about their use case, if requested.
---
David Howells (3):
watch_queue: Make watch_sizeof() check record size
security: Add hooks to rule on setting a watch for superblock
vfs: Add superblock notifications
Gabriel Krisman Bertazi (5):
watch_queue: Support a text field at the end of the notification
vfs: Include origin of the SB error notification
fs: Add more superblock error subtypes
ext4: Implement SB error notification through watch_sb
samples: watch_queue: Add sample of SB notifications
arch/x86/entry/syscalls/syscall_32.tbl | 1 +
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
fs/Kconfig | 12 ++
fs/ext4/super.c | 31 +++--
fs/super.c | 127 +++++++++++++++++++++
include/linux/fs.h | 150 +++++++++++++++++++++++++
include/linux/lsm_hook_defs.h | 1 +
include/linux/lsm_hooks.h | 4 +
include/linux/security.h | 13 +++
include/linux/syscalls.h | 2 +
include/linux/watch_queue.h | 21 +++-
include/uapi/asm-generic/unistd.h | 4 +-
include/uapi/linux/watch_queue.h | 54 ++++++++-
kernel/sys_ni.c | 3 +
kernel/watch_queue.c | 29 ++++-
samples/watch_queue/Makefile | 2 +-
samples/watch_queue/watch_sb.c | 114 +++++++++++++++++++
security/security.c | 6 +
18 files changed, 556 insertions(+), 19 deletions(-)
create mode 100644 samples/watch_queue/watch_sb.c
--
2.29.2
From: David Howells <[email protected]>
Add security hooks that will allow an LSM to rule on whether or not a watch
may be set for a supperblock.
Signed-off-by: David Howells <[email protected]>
[Drop mount and key changes. Rebase to mainline]
Signed-off-by: Gabriel Krisman Bertazi <[email protected]>
---
include/linux/lsm_hook_defs.h | 1 +
include/linux/lsm_hooks.h | 4 ++++
include/linux/security.h | 13 +++++++++++++
security/security.c | 6 ++++++
4 files changed, 24 insertions(+)
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 32a940117e7a..8fa8533598bc 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -261,6 +261,7 @@ LSM_HOOK(int, 0, inode_getsecctx, struct inode *inode, void **ctx,
#if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE)
LSM_HOOK(int, 0, post_notification, const struct cred *w_cred,
const struct cred *cred, struct watch_notification *n)
+LSM_HOOK(int, 0, watch_sb, struct super_block *sb)
#endif /* CONFIG_SECURITY && CONFIG_WATCH_QUEUE */
#if defined(CONFIG_SECURITY) && defined(CONFIG_KEY_NOTIFICATIONS)
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index c503f7ab8afb..11197bf167d3 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1475,6 +1475,10 @@
* @w_cred: The credentials of the whoever set the watch.
* @cred: The event-triggerer's credentials
* @n: The notification being posted
+ * @watch_sb:
+ * Check to see if a process is allowed to watch for event notifications
+ * from a superblock.
+ * @sb: The superblock to watch.
*
* @watch_key:
* Check to see if a process is allowed to watch for event notifications
diff --git a/include/linux/security.h b/include/linux/security.h
index bc2725491560..078e11a8872a 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -456,6 +456,11 @@ int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen);
int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen);
int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen);
int security_locked_down(enum lockdown_reason what);
+
+#ifdef CONFIG_WATCH_QUEUE
+int security_watch_sb(struct super_block *sb);
+#endif /* CONFIG_WATCH_QUEUE */
+
#else /* CONFIG_SECURITY */
static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data)
@@ -1304,6 +1309,14 @@ static inline int security_locked_down(enum lockdown_reason what)
{
return 0;
}
+
+#ifdef CONFIG_WATCH_QUEUE
+static inline int security_watch_sb(struct super_block *sb)
+{
+ return 0;
+}
+#endif /* CONFIG_WATCH_QUEUE */
+
#endif /* CONFIG_SECURITY */
#if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE)
diff --git a/security/security.c b/security/security.c
index a28045dc9e7f..a23a972063cd 100644
--- a/security/security.c
+++ b/security/security.c
@@ -2074,6 +2074,12 @@ int security_post_notification(const struct cred *w_cred,
{
return call_int_hook(post_notification, 0, w_cred, cred, n);
}
+
+int security_watch_sb(struct super_block *sb)
+{
+ return call_int_hook(watch_sb, 0, sb);
+}
+
#endif /* CONFIG_WATCH_QUEUE */
#ifdef CONFIG_KEY_NOTIFICATIONS
--
2.29.2
This allow notifications to send text information to userspace without
having to copy it to a temporary buffer to then copy to the ring. One
use case to pass text information in notifications is for error
reporting, where more debug information might be needed, but we don't
want to explode the number of subtypes of notifications. For instance,
ext4 can have a single inode error notification subtype, and pass more
information on the cause of the error in this field.
Signed-off-by: Gabriel Krisman Bertazi <[email protected]>
---
include/linux/watch_queue.h | 14 ++++++++++++--
kernel/watch_queue.c | 29 ++++++++++++++++++++++++-----
2 files changed, 36 insertions(+), 7 deletions(-)
diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h
index f1086d12cd03..2f5a7446bca6 100644
--- a/include/linux/watch_queue.h
+++ b/include/linux/watch_queue.h
@@ -79,7 +79,7 @@ struct watch_list {
extern void __post_watch_notification(struct watch_list *,
struct watch_notification *,
const struct cred *,
- u64);
+ u64, const char*, va_list*);
extern struct watch_queue *get_watch_queue(int);
extern void put_watch_queue(struct watch_queue *);
extern void init_watch(struct watch *, struct watch_queue *);
@@ -105,7 +105,17 @@ static inline void post_watch_notification(struct watch_list *wlist,
u64 id)
{
if (unlikely(wlist))
- __post_watch_notification(wlist, n, cred, id);
+ __post_watch_notification(wlist, n, cred, id, NULL, NULL);
+}
+
+static inline void post_watch_notification_string(struct watch_list *wlist,
+ struct watch_notification *n,
+ const struct cred *cred,
+ u64 id, const char *fmt,
+ va_list *args)
+{
+ if (unlikely(wlist))
+ __post_watch_notification(wlist, n, cred, id, fmt, args);
}
static inline void remove_watch_list(struct watch_list *wlist, u64 id)
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index 0ef8f65bd2d7..89fcf0420ce7 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -70,13 +70,15 @@ static const struct pipe_buf_operations watch_queue_pipe_buf_ops = {
* Post a notification to a watch queue.
*/
static bool post_one_notification(struct watch_queue *wqueue,
- struct watch_notification *n)
+ struct watch_notification *n,
+ const char *fmt, va_list *args)
{
void *p;
struct pipe_inode_info *pipe = wqueue->pipe;
struct pipe_buffer *buf;
struct page *page;
unsigned int head, tail, mask, note, offset, len;
+ int wlen = 0;
bool done = false;
if (!pipe)
@@ -102,6 +104,23 @@ static bool post_one_notification(struct watch_queue *wqueue,
get_page(page);
len = n->info & WATCH_INFO_LENGTH;
p = kmap_atomic(page);
+ /*
+ * Write the tail description before the actual header, because
+ * the string needs to be generated to calculate the final
+ * notification size, that is passed in the header.
+ */
+ if (fmt) {
+ wlen = vscnprintf(p + offset + len, WATCH_INFO_LENGTH - len,
+ fmt, (args ? *args : NULL));
+ wlen += 1; /* vscnprintf doesn't include '\0' */
+ if (wlen > 0) {
+ n->info = n->info & ~WATCH_INFO_LENGTH;
+ n->info |= (len + wlen) & WATCH_INFO_LENGTH;
+ } else {
+ /* Drop errors when writing the extra string. */
+ wlen = 0;
+ }
+ }
memcpy(p + offset, n, len);
kunmap_atomic(p);
@@ -110,7 +129,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
buf->private = (unsigned long)wqueue;
buf->ops = &watch_queue_pipe_buf_ops;
buf->offset = offset;
- buf->len = len;
+ buf->len = (len + wlen);
buf->flags = PIPE_BUF_FLAG_WHOLE;
pipe->head = head + 1;
@@ -175,7 +194,7 @@ static bool filter_watch_notification(const struct watch_filter *wf,
void __post_watch_notification(struct watch_list *wlist,
struct watch_notification *n,
const struct cred *cred,
- u64 id)
+ u64 id, const char *fmt, va_list *args)
{
const struct watch_filter *wf;
struct watch_queue *wqueue;
@@ -202,7 +221,7 @@ void __post_watch_notification(struct watch_list *wlist,
if (security_post_notification(watch->cred, cred, n) < 0)
continue;
- post_one_notification(wqueue, n);
+ post_one_notification(wqueue, n, fmt, args);
}
rcu_read_unlock();
@@ -522,7 +541,7 @@ int remove_watch_from_object(struct watch_list *wlist, struct watch_queue *wq,
* protecting *wqueue from deallocation.
*/
if (wqueue) {
- post_one_notification(wqueue, &n.watch);
+ post_one_notification(wqueue, &n.watch, NULL, NULL);
spin_lock_bh(&wqueue->lock);
--
2.29.2
From: David Howells <[email protected]>
Add a superblock event notification facility whereby notifications about
superblock events, such as I/O errors (EIO), quota limits being hit
(EDQUOT) and running out of space (ENOSPC) can be reported to a monitoring
process asynchronously. Note that this does not cover vfsmount topology
changes. watch_mount() is used for that.
Records are of the following format:
struct superblock_notification {
struct watch_notification watch;
__u64 sb_id;
} *n;
Where:
n->watch.type will be WATCH_TYPE_SB_NOTIFY.
n->watch.subtype will indicate the type of event, such as
NOTIFY_SUPERBLOCK_READONLY.
n->watch.info & WATCH_INFO_LENGTH will indicate the length of the
record.
n->watch.info & WATCH_INFO_ID will be the fifth argument to
watch_sb(), shifted.
n->watch.info & NOTIFY_SUPERBLOCK_IS_NOW_RO will be used for
NOTIFY_SUPERBLOCK_READONLY, being set if the superblock becomes
R/O, and being cleared otherwise.
n->sb_id will be the ID of the superblock, as can be retrieved with
the fsinfo() syscall, as part of the fsinfo_sb_notifications
attribute in the watch_id field.
Note that it is permissible for event records to be of variable length -
or, at least, the length may be dependent on the subtype. Note also that
the queue can be shared between multiple notifications of various types.
Signed-off-by: David Howells <[email protected]>
[Rebase to mainline. Expose inode and block on sb_error.
Update API and commit message]
Signed-off-by: Gabriel Krisman Bertazi <[email protected]>
---
arch/x86/entry/syscalls/syscall_32.tbl | 1 +
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
fs/Kconfig | 12 +++
fs/super.c | 127 +++++++++++++++++++++++++
include/linux/fs.h | 87 +++++++++++++++++
include/linux/syscalls.h | 2 +
include/uapi/asm-generic/unistd.h | 4 +-
include/uapi/linux/watch_queue.h | 34 ++++++-
kernel/sys_ni.c | 3 +
9 files changed, 269 insertions(+), 2 deletions(-)
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 0d0667a9fbd7..c481ab8c4454 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -445,3 +445,4 @@
438 i386 pidfd_getfd sys_pidfd_getfd
439 i386 faccessat2 sys_faccessat2
440 i386 process_madvise sys_process_madvise
+441 i386 watch_sb sys_watch_sb
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 379819244b91..87efe2577169 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -362,6 +362,7 @@
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise
+441 common watch_sb sys_watch_sb
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/fs/Kconfig b/fs/Kconfig
index aa4c12282301..4e96521c37a1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -117,6 +117,18 @@ source "fs/verity/Kconfig"
source "fs/notify/Kconfig"
+config SB_NOTIFICATIONS
+ bool "Superblock event notifications"
+ select WATCH_QUEUE
+ help
+ This option provides support for receiving superblock event
+ notifications. This makes use of the watch_queue API to
+ handle the notification buffer and provides the sb_notify()
+ system call to enable/disable watches.
+
+ Events can include things like changing between R/W and R/O, EIO
+ generation, ENOSPC generation and EDQUOT generation.
+
source "fs/quota/Kconfig"
source "fs/autofs/Kconfig"
diff --git a/fs/super.c b/fs/super.c
index a51c2083cd6b..8178a595a336 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -37,6 +37,8 @@
#include <linux/lockdep.h>
#include <linux/user_namespace.h>
#include <linux/fs_context.h>
+#include <linux/syscalls.h>
+#include <linux/namei.h>
#include <uapi/linux/mount.h>
#include "internal.h"
@@ -330,6 +332,10 @@ void deactivate_locked_super(struct super_block *s)
{
struct file_system_type *fs = s->s_type;
if (atomic_dec_and_test(&s->s_active)) {
+#ifdef CONFIG_SB_NOTIFICATIONS
+ if (s->s_watchers)
+ remove_watch_list(s->s_watchers, s->s_unique_id);
+#endif
cleancache_invalidate_fs(s);
unregister_shrinker(&s->s_shrink);
fs->kill_sb(s);
@@ -969,6 +975,8 @@ int reconfigure_super(struct fs_context *fc)
/* Needs to be ordered wrt mnt_is_readonly() */
smp_wmb();
sb->s_readonly_remount = 0;
+ notify_sb(sb, NOTIFY_SUPERBLOCK_READONLY,
+ remount_ro ? NOTIFY_SUPERBLOCK_IS_NOW_RO : 0);
/*
* Some filesystems modify their metadata via some other path than the
@@ -1867,3 +1875,122 @@ int thaw_super(struct super_block *sb)
return thaw_super_locked(sb);
}
EXPORT_SYMBOL(thaw_super);
+
+#ifdef CONFIG_SB_NOTIFICATIONS
+/*
+ * Post superblock notifications.
+ */
+
+void post_sb_notification(struct super_block *s, struct superblock_notification *n,
+ const char *fmt, va_list *args)
+{
+ post_watch_notification_string(s->s_watchers, &n->watch, current_cred(),
+ s->s_unique_id, fmt, args);
+}
+
+/**
+ * sys_watch_sb - Watch for superblock events.
+ * @dfd: Base directory to pathwalk from or fd referring to superblock.
+ * @filename: Path to superblock to place the watch upon
+ * @at_flags: Pathwalk control flags
+ * @watch_fd: The watch queue to send notifications to.
+ * @watch_id: The watch ID to be placed in the notification (-1 to remove watch)
+ */
+SYSCALL_DEFINE5(watch_sb,
+ int, dfd,
+ const char __user *, filename,
+ unsigned int, at_flags,
+ int, watch_fd,
+ int, watch_id)
+{
+ struct watch_queue *wqueue;
+ struct super_block *s;
+ struct watch_list *wlist = NULL;
+ struct watch *watch = NULL;
+ struct path path;
+ unsigned int lookup_flags =
+ LOOKUP_DIRECTORY | LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT;
+ int ret;
+
+ if (watch_id < -1 || watch_id > 0xff)
+ return -EINVAL;
+ if ((at_flags & ~(AT_NO_AUTOMOUNT | AT_EMPTY_PATH)) != 0)
+ return -EINVAL;
+ if (at_flags & AT_NO_AUTOMOUNT)
+ lookup_flags &= ~LOOKUP_AUTOMOUNT;
+ if (at_flags & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+
+ ret = user_path_at(dfd, filename, at_flags, &path);
+ if (ret)
+ return ret;
+
+ ret = inode_permission(path.dentry->d_inode, MAY_EXEC);
+ if (ret)
+ goto err_path;
+
+ wqueue = get_watch_queue(watch_fd);
+ if (IS_ERR(wqueue))
+ goto err_path;
+
+ s = path.dentry->d_sb;
+ if (watch_id >= 0) {
+ ret = -ENOMEM;
+ if (!s->s_watchers) {
+ wlist = kzalloc(sizeof(*wlist), GFP_KERNEL);
+ if (!wlist)
+ goto err_wqueue;
+ init_watch_list(wlist, NULL);
+ }
+
+ watch = kzalloc(sizeof(*watch), GFP_KERNEL);
+ if (!watch)
+ goto err_wlist;
+
+ init_watch(watch, wqueue);
+ watch->id = s->s_unique_id;
+ watch->private = s;
+ watch->info_id = (u32)watch_id << 24;
+
+ ret = security_watch_sb(s);
+ if (ret < 0)
+ goto err_watch;
+
+ down_write(&s->s_umount);
+ ret = -EIO;
+ if (atomic_read(&s->s_active)) {
+ if (!s->s_watchers) {
+ s->s_watchers = wlist;
+ wlist = NULL;
+ }
+
+ ret = add_watch_to_object(watch, s->s_watchers);
+ if (ret == 0) {
+ spin_lock(&sb_lock);
+ s->s_count++;
+ spin_unlock(&sb_lock);
+ watch = NULL;
+ }
+ }
+ up_write(&s->s_umount);
+ } else {
+ ret = -EBADSLT;
+ if (READ_ONCE(s->s_watchers)) {
+ down_write(&s->s_umount);
+ ret = remove_watch_from_object(s->s_watchers, wqueue,
+ s->s_unique_id, false);
+ up_write(&s->s_umount);
+ }
+ }
+
+err_watch:
+ kfree(watch);
+err_wlist:
+ kfree(wlist);
+err_wqueue:
+ put_watch_queue(wqueue);
+err_path:
+ path_put(&path);
+ return ret;
+}
+#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 21cc971fd960..cf5245f414c2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -39,6 +39,7 @@
#include <linux/fs_types.h>
#include <linux/build_bug.h>
#include <linux/stddef.h>
+#include <linux/watch_queue.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -1547,6 +1548,13 @@ struct super_block {
spinlock_t s_inode_wblist_lock;
struct list_head s_inodes_wb; /* writeback inodes */
+
+ /* Superblock event notifications */
+ u64 s_unique_id;
+
+#ifdef CONFIG_SB_NOTIFICATIONS
+ struct watch_list *s_watchers;
+#endif
} __randomize_layout;
/* Helper functions so that in most cases filesystems will
@@ -3460,4 +3468,83 @@ static inline int inode_drain_writes(struct inode *inode)
return filemap_write_and_wait(inode->i_mapping);
}
+extern void post_sb_notification(struct super_block *, struct superblock_notification *,
+ const char *fmt, va_list *args);
+/**
+ * notify_sb: Post simple superblock notification.
+ * @s: The superblock the notification is about.
+ * @subtype: The type of notification.
+ * @info: WATCH_INFO_FLAG_* flags to be set in the record.
+ */
+static inline void notify_sb(struct super_block *s,
+ enum superblock_notification_type subtype,
+ u32 info)
+{
+#ifdef CONFIG_SB_NOTIFICATIONS
+ if (unlikely(s->s_watchers)) {
+ struct superblock_notification n = {
+ .watch.type = WATCH_TYPE_SB_NOTIFY,
+ .watch.subtype = subtype,
+ .watch.info = watch_sizeof(n) | info,
+ .sb_id = s->s_unique_id,
+ };
+
+ post_sb_notification(s, &n, NULL, NULL);
+ }
+
+#endif
+}
+
+/**
+ * notify_sb_error: Post superblock error notification.
+ * @s: The superblock the notification is about.
+ * @error: The error number to be recorded.
+ * @inode: The inode the error refers to (if available, 0 otherwise)
+ * @block: The block the error refers to (if available, 0 otherwise)
+ * @fmt: Formating string for extra information appended to the notification
+ * @args: arguments for extra information string appended to the notification
+ */
+static inline int notify_sb_error(struct super_block *s, int error, u64 inode,
+ u64 block, const char *fmt, va_list *args)
+{
+#ifdef CONFIG_SB_NOTIFICATIONS
+ if (unlikely(s->s_watchers)) {
+ struct superblock_error_notification n = {
+ .s.watch.type = WATCH_TYPE_SB_NOTIFY,
+ .s.watch.subtype = NOTIFY_SUPERBLOCK_ERROR,
+ .s.watch.info = watch_sizeof(n),
+ .s.sb_id = s->s_unique_id,
+ .error_number = error,
+ .error_cookie = 0,
+ .inode = inode,
+ .block = block,
+ };
+
+ post_sb_notification(s, &n.s, fmt, args);
+ }
+#endif
+ return error;
+}
+
+/**
+ * notify_sb_EDQUOT: Post superblock quota overrun notification.
+ * @s: The superblock the notification is about.
+ */
+static inline int notify_sb_EQDUOT(struct super_block *s)
+{
+#ifdef CONFIG_SB_NOTIFICATIONS
+ if (unlikely(s->s_watchers)) {
+ struct superblock_notification n = {
+ .watch.type = WATCH_TYPE_SB_NOTIFY,
+ .watch.subtype = NOTIFY_SUPERBLOCK_EDQUOT,
+ .watch.info = watch_sizeof(n),
+ .sb_id = s->s_unique_id,
+ };
+
+ post_sb_notification(s, &n, NULL, NULL);
+ }
+#endif
+ return -EDQUOT;
+}
+
#endif /* _LINUX_FS_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 37bea07c12f2..5f7b282d331d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1008,6 +1008,8 @@ asmlinkage long sys_pidfd_send_signal(int pidfd, int sig,
siginfo_t __user *info,
unsigned int flags);
asmlinkage long sys_pidfd_getfd(int pidfd, int fd, unsigned int flags);
+asmlinkage long sys_watch_sb(int dfd, const char __user *path,
+ unsigned int at_flags, int watch_fd, int watch_id);
/*
* Architecture-specific system calls
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 2056318988f7..5eec69e2b312 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -859,9 +859,11 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
__SYSCALL(__NR_faccessat2, sys_faccessat2)
#define __NR_process_madvise 440
__SYSCALL(__NR_process_madvise, sys_process_madvise)
+#define __NR_watch_sb 441
+__SYSCALL(__NR_watch_sb, sys_watch_sb)
#undef __NR_syscalls
-#define __NR_syscalls 441
+#define __NR_syscalls 442
/*
* 32 bit systems traditionally used different
diff --git a/include/uapi/linux/watch_queue.h b/include/uapi/linux/watch_queue.h
index c3d8320b5d3a..937363d9f7b3 100644
--- a/include/uapi/linux/watch_queue.h
+++ b/include/uapi/linux/watch_queue.h
@@ -14,7 +14,8 @@
enum watch_notification_type {
WATCH_TYPE_META = 0, /* Special record */
WATCH_TYPE_KEY_NOTIFY = 1, /* Key change event notification */
- WATCH_TYPE__NR = 2
+ WATCH_TYPE_SB_NOTIFY = 2,
+ WATCH_TYPE__NR = 3
};
enum watch_meta_notification_subtype {
@@ -101,4 +102,35 @@ struct key_notification {
__u32 aux; /* Per-type auxiliary data */
};
+/*
+ * Type of superblock notification.
+ */
+enum superblock_notification_type {
+ NOTIFY_SUPERBLOCK_READONLY = 0, /* Filesystem toggled between R/O and R/W */
+ NOTIFY_SUPERBLOCK_ERROR = 1, /* Error in filesystem or blockdev */
+ NOTIFY_SUPERBLOCK_EDQUOT = 2, /* EDQUOT notification */
+ NOTIFY_SUPERBLOCK_NETWORK = 3, /* Network status change */
+};
+
+#define NOTIFY_SUPERBLOCK_IS_NOW_RO WATCH_INFO_FLAG_0 /* Superblock changed to R/O */
+
+/*
+ * Superblock notification record.
+ * - watch.type = WATCH_TYPE_MOUNT_NOTIFY
+ * - watch.subtype = enum superblock_notification_subtype
+ */
+struct superblock_notification {
+ struct watch_notification watch; /* WATCH_TYPE_SB_NOTIFY */
+ __u64 sb_id; /* 64-bit superblock ID [fsinfo_ids::f_sb_id] */
+};
+
+struct superblock_error_notification {
+ struct superblock_notification s; /* subtype = notify_superblock_error */
+ __u32 error_number;
+ __u32 error_cookie;
+ __u64 inode;
+ __u64 block;
+ char desc[0];
+};
+
#endif /* _UAPI_LINUX_WATCH_QUEUE_H */
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index f27ac94d5fa7..3e97984bc4c8 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -51,6 +51,9 @@ COND_SYSCALL_COMPAT(io_pgetevents);
COND_SYSCALL(io_uring_setup);
COND_SYSCALL(io_uring_enter);
COND_SYSCALL(io_uring_register);
+COND_SYSCALL(fsinfo);
+COND_SYSCALL(watch_mount);
+COND_SYSCALL(watch_sb);
/* fs/xattr.c */
--
2.29.2
When reporting a filesystem error, we really need to know where the
error came from, therefore, include "function:line" information in the
notification sent to userspace. There is no current users of notify_sb
in the kernel, so there are no callers to update.
Signed-off-by: Gabriel Krisman Bertazi <[email protected]>
---
include/linux/fs.h | 11 +++++++++--
include/uapi/linux/watch_queue.h | 3 +++
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cf5245f414c2..81aaa673ada7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3498,14 +3498,17 @@ static inline void notify_sb(struct super_block *s,
/**
* notify_sb_error: Post superblock error notification.
* @s: The superblock the notification is about.
+ * @function: function name reported as source of the warning.
+ * @line: source code line reported as source of the warning.
* @error: The error number to be recorded.
* @inode: The inode the error refers to (if available, 0 otherwise)
* @block: The block the error refers to (if available, 0 otherwise)
* @fmt: Formating string for extra information appended to the notification
* @args: arguments for extra information string appended to the notification
*/
-static inline int notify_sb_error(struct super_block *s, int error, u64 inode,
- u64 block, const char *fmt, va_list *args)
+static inline int notify_sb_error(struct super_block *s, const char *function, int line,
+ int error, u64 inode, u64 block,
+ const char *fmt, va_list *args)
{
#ifdef CONFIG_SB_NOTIFICATIONS
if (unlikely(s->s_watchers)) {
@@ -3518,8 +3521,12 @@ static inline int notify_sb_error(struct super_block *s, int error, u64 inode,
.error_cookie = 0,
.inode = inode,
.block = block,
+ .line = line,
};
+ memcpy(&n.function, function, SB_NOTIFICATION_FNAME_LEN);
+ n.function[SB_NOTIFICATION_FNAME_LEN-1] = '\0';
+
post_sb_notification(s, &n.s, fmt, args);
}
#endif
diff --git a/include/uapi/linux/watch_queue.h b/include/uapi/linux/watch_queue.h
index 937363d9f7b3..5fa5286c5cc7 100644
--- a/include/uapi/linux/watch_queue.h
+++ b/include/uapi/linux/watch_queue.h
@@ -114,6 +114,7 @@ enum superblock_notification_type {
#define NOTIFY_SUPERBLOCK_IS_NOW_RO WATCH_INFO_FLAG_0 /* Superblock changed to R/O */
+#define SB_NOTIFICATION_FNAME_LEN 30
/*
* Superblock notification record.
* - watch.type = WATCH_TYPE_MOUNT_NOTIFY
@@ -130,6 +131,8 @@ struct superblock_error_notification {
__u32 error_cookie;
__u64 inode;
__u64 block;
+ char function[SB_NOTIFICATION_FNAME_LEN];
+ __u16 line;
char desc[0];
};
--
2.29.2
Expose new SB notification subtype for warnings, errors and general
messages. This is modeled after the information exposed by ext4, but
should be the same for other filesystems.
Signed-off-by: Gabriel Krisman Bertazi <[email protected]>
---
include/linux/fs.h | 56 ++++++++++++++++++++++++++++++++
include/uapi/linux/watch_queue.h | 17 ++++++++++
2 files changed, 73 insertions(+)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 81aaa673ada7..9c241689d8bc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3533,6 +3533,62 @@ static inline int notify_sb_error(struct super_block *s, const char *function, i
return error;
}
+/**
+ * notify_sb_warning: Post superblock warning notification.
+ * @s: The superblock the notification is about.
+ * @function: function name reported as source of the warning.
+ * @line: source code line reported as source of the warning.
+ * @inode: The inode the error refers to (if available, 0 otherwise)
+ * @block: The block the error refers to (if available, 0 otherwise)
+ * @fmt: Formating string for extra information appended to the notification
+ * @args: arguments for extra information string appended to the notification
+ */
+static inline void notify_sb_warning(struct super_block *s, const char *function,
+ int line, u64 inode, u64 block,
+ const char *fmt, va_list *args)
+{
+#ifdef CONFIG_SB_NOTIFICATIONS
+ if (unlikely(s->s_watchers)) {
+ struct superblock_error_notification n = {
+ .s.watch.type = WATCH_TYPE_SB_NOTIFY,
+ .s.watch.subtype = NOTIFY_SUPERBLOCK_WARNING,
+ .s.watch.info = watch_sizeof(n),
+ .s.sb_id = s->s_unique_id,
+ .inode = inode,
+ .block = block,
+ .line = line,
+ };
+
+ memcpy(&n.function, function, SB_NOTIFICATION_FNAME_LEN);
+ n.function[SB_NOTIFICATION_FNAME_LEN-1] = '\0';
+
+ post_sb_notification(s, &n.s, fmt, args);
+ }
+#endif
+}
+
+/**
+ * notify_sb_msg: Post superblock message.
+ * @s: The superblock the notification is about.
+ * @fmt: Formating string for extra information appended to the notification
+ * @args: arguments for extra information string appended to the notification
+ */
+static inline void notify_sb_msg(struct super_block *s, const char *fmt, va_list *args)
+{
+#ifdef CONFIG_SB_NOTIFICATIONS
+ if (unlikely(s->s_watchers)) {
+ struct superblock_msg_notification n = {
+ .s.watch.type = WATCH_TYPE_SB_NOTIFY,
+ .s.watch.subtype = NOTIFY_SUPERBLOCK_MSG,
+ .s.watch.info = watch_sizeof(n),
+ .s.sb_id = s->s_unique_id,
+ };
+
+ post_sb_notification(s, &n.s, fmt, args);
+ }
+#endif
+}
+
/**
* notify_sb_EDQUOT: Post superblock quota overrun notification.
* @s: The superblock the notification is about.
diff --git a/include/uapi/linux/watch_queue.h b/include/uapi/linux/watch_queue.h
index 5fa5286c5cc7..c4afd545e234 100644
--- a/include/uapi/linux/watch_queue.h
+++ b/include/uapi/linux/watch_queue.h
@@ -110,6 +110,9 @@ enum superblock_notification_type {
NOTIFY_SUPERBLOCK_ERROR = 1, /* Error in filesystem or blockdev */
NOTIFY_SUPERBLOCK_EDQUOT = 2, /* EDQUOT notification */
NOTIFY_SUPERBLOCK_NETWORK = 3, /* Network status change */
+ NOTIFY_SUPERBLOCK_MSG = 4, /* Filesystem message */
+ NOTIFY_SUPERBLOCK_WARNING = 5, /* Filesystem warning */
+
};
#define NOTIFY_SUPERBLOCK_IS_NOW_RO WATCH_INFO_FLAG_0 /* Superblock changed to R/O */
@@ -136,4 +139,18 @@ struct superblock_error_notification {
char desc[0];
};
+struct superblock_msg_notification {
+ struct superblock_notification s; /* subtype = notify_superblock_msg */
+ char desc[0];
+};
+
+struct superblock_warning_notification {
+ struct superblock_notification s; /* subtype = notify_superblock_warning */
+ __u64 inode;
+ __u64 block;
+ char function[SB_NOTIFICATION_FNAME_LEN];
+ __u16 line;
+ char desc[0];
+};
+
#endif /* _UAPI_LINUX_WATCH_QUEUE_H */
--
2.29.2
This follows the same implementation of ext4 error reporting via dmesg,
but expose that information via the new watch_queue notifications API.
Signed-off-by: Gabriel Krisman Bertazi <[email protected]>
---
fs/ext4/super.c | 31 +++++++++++++++++++++++--------
1 file changed, 23 insertions(+), 8 deletions(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c3b864588a0b..58dc1e48b683 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -724,15 +724,17 @@ void __ext4_error(struct super_block *sb, const char *function,
return;
trace_ext4_error(sb, function, line);
+ va_start(args, fmt);
if (ext4_error_ratelimit(sb)) {
- va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
printk(KERN_CRIT
"EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
sb->s_id, function, line, current->comm, &vaf);
- va_end(args);
}
+ notify_sb_error(sb, function, line, error, 0, 0, fmt, &args);
+ va_end(args);
+
save_error_info(sb, error, 0, block, function, line);
ext4_handle_error(sb);
}
@@ -748,8 +750,8 @@ void __ext4_error_inode(struct inode *inode, const char *function,
return;
trace_ext4_error(inode->i_sb, function, line);
+ va_start(args, fmt);
if (ext4_error_ratelimit(inode->i_sb)) {
- va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
if (block)
@@ -762,8 +764,11 @@ void __ext4_error_inode(struct inode *inode, const char *function,
"inode #%lu: comm %s: %pV\n",
inode->i_sb->s_id, function, line, inode->i_ino,
current->comm, &vaf);
- va_end(args);
}
+ notify_sb_error(inode->i_sb, function, line, error, inode->i_ino, block,
+ fmt, &args);
+ va_end(args);
+
save_error_info(inode->i_sb, error, inode->i_ino, block,
function, line);
ext4_handle_error(inode->i_sb);
@@ -782,11 +787,11 @@ void __ext4_error_file(struct file *file, const char *function,
return;
trace_ext4_error(inode->i_sb, function, line);
+ va_start(args, fmt);
if (ext4_error_ratelimit(inode->i_sb)) {
path = file_path(file, pathname, sizeof(pathname));
if (IS_ERR(path))
path = "(unknown)";
- va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
if (block)
@@ -801,8 +806,11 @@ void __ext4_error_file(struct file *file, const char *function,
"comm %s: path %s: %pV\n",
inode->i_sb->s_id, function, line, inode->i_ino,
current->comm, path, &vaf);
- va_end(args);
}
+ notify_sb_error(inode->i_sb, function, line, EFSCORRUPTED,
+ inode->i_ino, block, fmt, &args);
+ va_end(args);
+
save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block,
function, line);
ext4_handle_error(inode->i_sb);
@@ -872,6 +880,8 @@ void __ext4_std_error(struct super_block *sb, const char *function,
sb->s_id, function, line, errstr);
}
+ notify_sb_error(sb, function, line, errno, 0, 0, errstr, NULL);
+
save_error_info(sb, -errno, 0, 0, function, line);
ext4_handle_error(sb);
}
@@ -901,6 +911,7 @@ void __ext4_abort(struct super_block *sb, const char *function,
vaf.va = &args;
printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n",
sb->s_id, function, line, &vaf);
+ notify_sb_error(sb, function, line, error, 0, 0, fmt, &args);
va_end(args);
if (sb_rdonly(sb) == 0) {
@@ -934,6 +945,7 @@ void __ext4_msg(struct super_block *sb,
vaf.fmt = fmt;
vaf.va = &args;
printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
+ notify_sb_msg(sb, fmt, &args);
va_end(args);
}
@@ -958,6 +970,7 @@ void __ext4_warning(struct super_block *sb, const char *function,
vaf.va = &args;
printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
sb->s_id, function, line, &vaf);
+ notify_sb_warning(sb, function, line, 0, 0, fmt, &args);
va_end(args);
}
@@ -976,6 +989,7 @@ void __ext4_warning_inode(const struct inode *inode, const char *function,
printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
"inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
function, line, inode->i_ino, current->comm, &vaf);
+ notify_sb_warning(inode->i_sb, function, line, inode->i_ino, 0, fmt, &args);
va_end(args);
}
@@ -995,8 +1009,8 @@ __acquires(bitlock)
trace_ext4_error(sb, function, line);
__save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
+ va_start(args, fmt);
if (ext4_error_ratelimit(sb)) {
- va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
@@ -1007,8 +1021,9 @@ __acquires(bitlock)
printk(KERN_CONT "block %llu:",
(unsigned long long) block);
printk(KERN_CONT "%pV\n", &vaf);
- va_end(args);
}
+ notify_sb_error(sb, function, line, EFSCORRUPTED, ino, block, fmt, &args);
+ va_end(args);
if (test_opt(sb, WARN_ON_ERROR))
WARN_ON_ONCE(1);
--
2.29.2
This sample demonstrates how to use the watch_sb syscall. It exposes
notifications like the following:
root@host:~# ./watch_sb /mnt
read() = 93
NOTIFY[000]: ty=000002 sy=01 i=0300005d
SB AT ext4_remount:5636 ERROR: 16 inode=0 block=0
description: Abort forced by user
read() = 96
NOTIFY[000]: ty=000002 sy=01 i=03000060
SB AT ext4_lookup:1706 ERROR: 0 inode=13 block=0
description: iget: bogus i_mode (45)
Signed-off-by: Gabriel Krisman Bertazi <[email protected]>
---
samples/watch_queue/Makefile | 2 +-
samples/watch_queue/watch_sb.c | 114 +++++++++++++++++++++++++++++++++
2 files changed, 115 insertions(+), 1 deletion(-)
create mode 100644 samples/watch_queue/watch_sb.c
diff --git a/samples/watch_queue/Makefile b/samples/watch_queue/Makefile
index c0db3a6bc524..6067d57a5bb1 100644
--- a/samples/watch_queue/Makefile
+++ b/samples/watch_queue/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-userprogs-always-y += watch_test
+userprogs-always-y += watch_test watch_sb
userccflags += -I usr/include
diff --git a/samples/watch_queue/watch_sb.c b/samples/watch_queue/watch_sb.c
new file mode 100644
index 000000000000..51b660334f6b
--- /dev/null
+++ b/samples/watch_queue/watch_sb.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Use watch_sb to watch for SB notifications.
+ *
+ * Copyright (C) 2020 Collabora Ltd.
+ * Written by Gabriel Krisman Bertazi <[email protected]>
+ * Based on watch_test.c by David Howells ([email protected])
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <err.h>
+#include <string.h>
+#include<sys/ioctl.h>
+#include <linux/watch_queue.h>
+
+#ifndef __NR_watch_sb
+# define __NR_watch_sb 441
+#endif
+
+static void consumer(int fd)
+{
+ unsigned char buffer[433], *p, *end;
+ union {
+ struct watch_notification n;
+ unsigned char buf1[128];
+ struct superblock_error_notification sen;
+ struct superblock_warning_notification swn;
+ struct superblock_msg_notification smn;
+ } n;
+ ssize_t buf_len;
+
+ for (;;) {
+ buf_len = read(fd, buffer, sizeof(buffer));
+ if (buf_len == -1)
+ err(1, "read");
+
+ if (buf_len == 0) {
+ printf("-- END --\n");
+ return;
+ }
+
+ if (buf_len > sizeof(buffer)) {
+ err(1, "Read buffer overrun: %zd\n", buf_len);
+ return;
+ }
+
+ printf("read() = %zd\n", buf_len);
+
+ p = buffer;
+ end = buffer + buf_len;
+ while (p < end) {
+ size_t largest, len;
+
+ largest = end - p;
+ if (largest > 128)
+ largest = 128;
+ if (largest < sizeof(struct watch_notification))
+ err(1, "Short message header: %zu\n", largest);
+
+ memcpy(&n, p, largest);
+
+ printf("NOTIFY[%03zx]: ty=%06x sy=%02x i=%08x\n",
+ p - buffer, n.n.type, n.n.subtype, n.n.info);
+
+ len = n.n.info & WATCH_INFO_LENGTH;
+ if (len < sizeof(n.n) || len > largest)
+ err(1, "Bad message length: %zu/%zu\n", len, largest);
+
+ switch (n.n.subtype) {
+ case NOTIFY_SUPERBLOCK_ERROR:
+ printf("\t SB AT %s:%d ERROR: %d inode=%llu block=%llu\n",
+ n.sen.function, n.sen.line, n.sen.error_number,
+ n.sen.inode, n.sen.block);
+ if (len > sizeof(n.sen))
+ printf("description: %s\n", n.sen.desc);
+ break;
+ case NOTIFY_SUPERBLOCK_MSG:
+ printf("\t Ext4 MSG: %s\n", n.smn.desc);
+ break;
+ case NOTIFY_SUPERBLOCK_WARNING:
+ printf("\t SB AT %s:%d WARNING inode=%llu block=%llu\n",
+ n.swn.function, n.swn.line, n.swn.inode, n.swn.block);
+ if (len > sizeof(n.sen))
+ printf("description: %s\n", n.swn.desc);
+ break;
+ default:
+ printf("unknown subtype %c\n", n.n.subtype);
+ }
+ p += len;
+ }
+ }
+}
+
+int main (int argc, char **argv)
+{
+ int fd[2];
+
+ if (argc != 2)
+ errx(1, "Missing mount point\n");
+
+ if (syscall(293, fd, O_NOTIFICATION_PIPE) < 0)
+ err(1, "Failed to open pipe\n");
+
+ if (ioctl(fd[0], IOC_WATCH_QUEUE_SET_SIZE, 256) < 0)
+ err(1, "ioctl fail\n");
+
+ if (syscall(__NR_watch_sb, 0, argv[1], NULL, fd[0], 0x3) < 0)
+ err(1, "Failed to watch SB\n");
+
+ consumer(fd[0]);
+
+ return 0;
+}
--
2.29.2