2020-10-30 10:11:01

by Daniel Vetter

[permalink] [raw]
Subject: [PATCH v5 13/15] resource: Move devmem revoke code to resource framework

We want all iomem mmaps to consistently revoke ptes when the kernel
takes over and CONFIG_IO_STRICT_DEVMEM is enabled. This includes the
pci bar mmaps available through procfs and sysfs, which currently do
not revoke mappings.

To prepare for this, move the code from the /dev/kmem driver to
kernel/resource.c.

Reviewed-by: Greg Kroah-Hartman <[email protected]>
Signed-off-by: Daniel Vetter <[email protected]>
Cc: Jason Gunthorpe <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: John Hubbard <[email protected]>
Cc: Jérôme Glisse <[email protected]>
Cc: Jan Kara <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Arnd Bergmann <[email protected]>
Cc: Greg Kroah-Hartman <[email protected]>
Cc: Daniel Vetter <[email protected]>
Cc: David Hildenbrand <[email protected]>
Cc: "Rafael J. Wysocki" <[email protected]>
Signed-off-by: Daniel Vetter <[email protected]>
--
v3:
- add barrier for consistency and document why we don't have to check
for NULL (Jason)
v4
- Adjust comments to reflect the general nature of this iomem revoke
code now (Dan)
---
drivers/char/mem.c | 85 +---------------------------------
include/linux/ioport.h | 6 +--
kernel/resource.c | 101 ++++++++++++++++++++++++++++++++++++++++-
3 files changed, 102 insertions(+), 90 deletions(-)

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 7dcf9e4ea79d..43c871dc7477 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -31,9 +31,6 @@
#include <linux/uio.h>
#include <linux/uaccess.h>
#include <linux/security.h>
-#include <linux/pseudo_fs.h>
-#include <uapi/linux/magic.h>
-#include <linux/mount.h>

#ifdef CONFIG_IA64
# include <linux/efi.h>
@@ -836,42 +833,6 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
return ret;
}

-static struct inode *devmem_inode;
-
-#ifdef CONFIG_IO_STRICT_DEVMEM
-void revoke_devmem(struct resource *res)
-{
- /* pairs with smp_store_release() in devmem_init_inode() */
- struct inode *inode = smp_load_acquire(&devmem_inode);
-
- /*
- * Check that the initialization has completed. Losing the race
- * is ok because it means drivers are claiming resources before
- * the fs_initcall level of init and prevent /dev/mem from
- * establishing mappings.
- */
- if (!inode)
- return;
-
- /*
- * The expectation is that the driver has successfully marked
- * the resource busy by this point, so devmem_is_allowed()
- * should start returning false, however for performance this
- * does not iterate the entire resource range.
- */
- if (devmem_is_allowed(PHYS_PFN(res->start)) &&
- devmem_is_allowed(PHYS_PFN(res->end))) {
- /*
- * *cringe* iomem=relaxed says "go ahead, what's the
- * worst that can happen?"
- */
- return;
- }
-
- unmap_mapping_range(inode->i_mapping, res->start, resource_size(res), 1);
-}
-#endif
-
static int open_port(struct inode *inode, struct file *filp)
{
int rc;
@@ -891,7 +852,7 @@ static int open_port(struct inode *inode, struct file *filp)
* revocations when drivers want to take over a /dev/mem mapped
* range.
*/
- filp->f_mapping = inode->i_mapping;
+ filp->f_mapping = iomem_get_mapping();

return 0;
}
@@ -1023,48 +984,6 @@ static char *mem_devnode(struct device *dev, umode_t *mode)

static struct class *mem_class;

-static int devmem_fs_init_fs_context(struct fs_context *fc)
-{
- return init_pseudo(fc, DEVMEM_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type devmem_fs_type = {
- .name = "devmem",
- .owner = THIS_MODULE,
- .init_fs_context = devmem_fs_init_fs_context,
- .kill_sb = kill_anon_super,
-};
-
-static int devmem_init_inode(void)
-{
- static struct vfsmount *devmem_vfs_mount;
- static int devmem_fs_cnt;
- struct inode *inode;
- int rc;
-
- rc = simple_pin_fs(&devmem_fs_type, &devmem_vfs_mount, &devmem_fs_cnt);
- if (rc < 0) {
- pr_err("Cannot mount /dev/mem pseudo filesystem: %d\n", rc);
- return rc;
- }
-
- inode = alloc_anon_inode(devmem_vfs_mount->mnt_sb);
- if (IS_ERR(inode)) {
- rc = PTR_ERR(inode);
- pr_err("Cannot allocate inode for /dev/mem: %d\n", rc);
- simple_release_fs(&devmem_vfs_mount, &devmem_fs_cnt);
- return rc;
- }
-
- /*
- * Publish /dev/mem initialized.
- * Pairs with smp_load_acquire() in revoke_devmem().
- */
- smp_store_release(&devmem_inode, inode);
-
- return 0;
-}
-
static int __init chr_dev_init(void)
{
int minor;
@@ -1086,8 +1005,6 @@ static int __init chr_dev_init(void)
*/
if ((minor == DEVPORT_MINOR) && !arch_has_dev_port())
continue;
- if ((minor == DEVMEM_MINOR) && devmem_init_inode() != 0)
- continue;

device_create(mem_class, NULL, MKDEV(MEM_MAJOR, minor),
NULL, devlist[minor].name);
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 5135d4b86cd6..02a5466245c0 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -307,11 +307,7 @@ struct resource *devm_request_free_mem_region(struct device *dev,
struct resource *request_free_mem_region(struct resource *base,
unsigned long size, const char *name);

-#ifdef CONFIG_IO_STRICT_DEVMEM
-void revoke_devmem(struct resource *res);
-#else
-static inline void revoke_devmem(struct resource *res) { };
-#endif
+extern struct address_space *iomem_get_mapping(void);

#endif /* __ASSEMBLY__ */
#endif /* _LINUX_IOPORT_H */
diff --git a/kernel/resource.c b/kernel/resource.c
index 3ae2f56cc79d..5ecc3187fe2d 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -18,12 +18,15 @@
#include <linux/spinlock.h>
#include <linux/fs.h>
#include <linux/proc_fs.h>
+#include <linux/pseudo_fs.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/device.h>
#include <linux/pfn.h>
#include <linux/mm.h>
+#include <linux/mount.h>
#include <linux/resource_ext.h>
+#include <uapi/linux/magic.h>
#include <asm/io.h>


@@ -1115,6 +1118,58 @@ resource_size_t resource_alignment(struct resource *res)

static DECLARE_WAIT_QUEUE_HEAD(muxed_resource_wait);

+static struct inode *iomem_inode;
+
+#ifdef CONFIG_IO_STRICT_DEVMEM
+static void revoke_iomem(struct resource *res)
+{
+ /* pairs with smp_store_release() in iomem_init_inode() */
+ struct inode *inode = smp_load_acquire(&iomem_inode);
+
+ /*
+ * Check that the initialization has completed. Losing the race
+ * is ok because it means drivers are claiming resources before
+ * the fs_initcall level of init and prevent iomem_get_mapping users
+ * from establishing mappings.
+ */
+ if (!inode)
+ return;
+
+ /*
+ * The expectation is that the driver has successfully marked
+ * the resource busy by this point, so devmem_is_allowed()
+ * should start returning false, however for performance this
+ * does not iterate the entire resource range.
+ */
+ if (devmem_is_allowed(PHYS_PFN(res->start)) &&
+ devmem_is_allowed(PHYS_PFN(res->end))) {
+ /*
+ * *cringe* iomem=relaxed says "go ahead, what's the
+ * worst that can happen?"
+ */
+ return;
+ }
+
+ unmap_mapping_range(inode->i_mapping, res->start, resource_size(res), 1);
+}
+struct address_space *iomem_get_mapping(void)
+{
+ /*
+ * This function is only called from file open paths, hence guaranteed
+ * that fs_initcalls have completed and no need to check for NULL. But
+ * since revoke_iomem can be called before the initcall we still need
+ * the barrier to appease checkers.
+ */
+ return smp_load_acquire(&iomem_inode)->i_mapping;
+}
+#else
+static void revoke_iomem(struct resource *res) {}
+struct address_space *iomem_get_mapping(void)
+{
+ return NULL;
+}
+#endif
+
/**
* __request_region - create a new busy resource region
* @parent: parent resource descriptor
@@ -1182,7 +1237,7 @@ struct resource * __request_region(struct resource *parent,
write_unlock(&resource_lock);

if (res && orig_parent == &iomem_resource)
- revoke_devmem(res);
+ revoke_iomem(res);

return res;
}
@@ -1782,4 +1837,48 @@ static int __init strict_iomem(char *str)
return 1;
}

+static int iomem_fs_init_fs_context(struct fs_context *fc)
+{
+ return init_pseudo(fc, DEVMEM_MAGIC) ? 0 : -ENOMEM;
+}
+
+static struct file_system_type iomem_fs_type = {
+ .name = "iomem",
+ .owner = THIS_MODULE,
+ .init_fs_context = iomem_fs_init_fs_context,
+ .kill_sb = kill_anon_super,
+};
+
+static int __init iomem_init_inode(void)
+{
+ static struct vfsmount *iomem_vfs_mount;
+ static int iomem_fs_cnt;
+ struct inode *inode;
+ int rc;
+
+ rc = simple_pin_fs(&iomem_fs_type, &iomem_vfs_mount, &iomem_fs_cnt);
+ if (rc < 0) {
+ pr_err("Cannot mount iomem pseudo filesystem: %d\n", rc);
+ return rc;
+ }
+
+ inode = alloc_anon_inode(iomem_vfs_mount->mnt_sb);
+ if (IS_ERR(inode)) {
+ rc = PTR_ERR(inode);
+ pr_err("Cannot allocate inode for iomem: %d\n", rc);
+ simple_release_fs(&iomem_vfs_mount, &iomem_fs_cnt);
+ return rc;
+ }
+
+ /*
+ * Publish iomem revocation inode initialized.
+ * Pairs with smp_load_acquire() in revoke_iomem().
+ */
+ smp_store_release(&iomem_inode, inode);
+
+ return 0;
+}
+
+fs_initcall(iomem_init_inode);
+
__setup("iomem=", strict_iomem);
--
2.28.0


2020-10-31 06:42:22

by John Hubbard

[permalink] [raw]
Subject: Re: [PATCH v5 13/15] resource: Move devmem revoke code to resource framework

On 10/30/20 3:08 AM, Daniel Vetter wrote:
> We want all iomem mmaps to consistently revoke ptes when the kernel
> takes over and CONFIG_IO_STRICT_DEVMEM is enabled. This includes the
> pci bar mmaps available through procfs and sysfs, which currently do
> not revoke mappings.
>
> To prepare for this, move the code from the /dev/kmem driver to
> kernel/resource.c.

This seems like it's doing a lot more than just code movement, right?
Should we list some of that here?

Also, I'm seeing a crash due to this commit. More below:

>
> Reviewed-by: Greg Kroah-Hartman <[email protected]>
> Signed-off-by: Daniel Vetter <[email protected]>
> Cc: Jason Gunthorpe <[email protected]>
> Cc: Kees Cook <[email protected]>
> Cc: Dan Williams <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: John Hubbard <[email protected]>
> Cc: Jérôme Glisse <[email protected]>
> Cc: Jan Kara <[email protected]>
> Cc: Dan Williams <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Cc: Arnd Bergmann <[email protected]>
> Cc: Greg Kroah-Hartman <[email protected]>
> Cc: Daniel Vetter <[email protected]>
> Cc: David Hildenbrand <[email protected]>
> Cc: "Rafael J. Wysocki" <[email protected]>
> Signed-off-by: Daniel Vetter <[email protected]>
> --
> v3:
> - add barrier for consistency and document why we don't have to check
> for NULL (Jason)
> v4
> - Adjust comments to reflect the general nature of this iomem revoke
> code now (Dan)
> ---
> drivers/char/mem.c | 85 +---------------------------------
> include/linux/ioport.h | 6 +--
> kernel/resource.c | 101 ++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 102 insertions(+), 90 deletions(-)
>
> diff --git a/drivers/char/mem.c b/drivers/char/mem.c
> index 7dcf9e4ea79d..43c871dc7477 100644
> --- a/drivers/char/mem.c
> +++ b/drivers/char/mem.c
> @@ -31,9 +31,6 @@
> #include <linux/uio.h>
> #include <linux/uaccess.h>
> #include <linux/security.h>
> -#include <linux/pseudo_fs.h>
> -#include <uapi/linux/magic.h>
> -#include <linux/mount.h>
>
> #ifdef CONFIG_IA64
> # include <linux/efi.h>
> @@ -836,42 +833,6 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
> return ret;
> }
>
> -static struct inode *devmem_inode;
> -
> -#ifdef CONFIG_IO_STRICT_DEVMEM
> -void revoke_devmem(struct resource *res)
> -{
> - /* pairs with smp_store_release() in devmem_init_inode() */
> - struct inode *inode = smp_load_acquire(&devmem_inode);
> -
> - /*
> - * Check that the initialization has completed. Losing the race
> - * is ok because it means drivers are claiming resources before
> - * the fs_initcall level of init and prevent /dev/mem from
> - * establishing mappings.
> - */
> - if (!inode)
> - return;
> -
> - /*
> - * The expectation is that the driver has successfully marked
> - * the resource busy by this point, so devmem_is_allowed()
> - * should start returning false, however for performance this
> - * does not iterate the entire resource range.
> - */
> - if (devmem_is_allowed(PHYS_PFN(res->start)) &&
> - devmem_is_allowed(PHYS_PFN(res->end))) {
> - /*
> - * *cringe* iomem=relaxed says "go ahead, what's the
> - * worst that can happen?"
> - */
> - return;
> - }
> -
> - unmap_mapping_range(inode->i_mapping, res->start, resource_size(res), 1);
> -}
> -#endif
> -
> static int open_port(struct inode *inode, struct file *filp)
> {
> int rc;
> @@ -891,7 +852,7 @@ static int open_port(struct inode *inode, struct file *filp)
> * revocations when drivers want to take over a /dev/mem mapped
> * range.
> */
> - filp->f_mapping = inode->i_mapping;
> + filp->f_mapping = iomem_get_mapping();


The problem is that iomem_get_mapping() returns NULL for the !CONFIG_IO_STRICT_DEVMEM
case. And then we have pre-existing fs code that expects to go "up and over", like this:


static int do_dentry_open(struct file *f,
struct inode *inode,
int (*open)(struct inode *, struct file *))
{
...

file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);

...and it crashes on that line fairly early in bootup.

Not sure what to suggest for this patch, but wanted to get this report out at least.

thanks,
--
John Hubbard
NVIDIA

>
> return 0;
> }
> @@ -1023,48 +984,6 @@ static char *mem_devnode(struct device *dev, umode_t *mode)
>
> static struct class *mem_class;
>
> -static int devmem_fs_init_fs_context(struct fs_context *fc)
> -{
> - return init_pseudo(fc, DEVMEM_MAGIC) ? 0 : -ENOMEM;
> -}
> -
> -static struct file_system_type devmem_fs_type = {
> - .name = "devmem",
> - .owner = THIS_MODULE,
> - .init_fs_context = devmem_fs_init_fs_context,
> - .kill_sb = kill_anon_super,
> -};
> -
> -static int devmem_init_inode(void)
> -{
> - static struct vfsmount *devmem_vfs_mount;
> - static int devmem_fs_cnt;
> - struct inode *inode;
> - int rc;
> -
> - rc = simple_pin_fs(&devmem_fs_type, &devmem_vfs_mount, &devmem_fs_cnt);
> - if (rc < 0) {
> - pr_err("Cannot mount /dev/mem pseudo filesystem: %d\n", rc);
> - return rc;
> - }
> -
> - inode = alloc_anon_inode(devmem_vfs_mount->mnt_sb);
> - if (IS_ERR(inode)) {
> - rc = PTR_ERR(inode);
> - pr_err("Cannot allocate inode for /dev/mem: %d\n", rc);
> - simple_release_fs(&devmem_vfs_mount, &devmem_fs_cnt);
> - return rc;
> - }
> -
> - /*
> - * Publish /dev/mem initialized.
> - * Pairs with smp_load_acquire() in revoke_devmem().
> - */
> - smp_store_release(&devmem_inode, inode);
> -
> - return 0;
> -}
> -
> static int __init chr_dev_init(void)
> {
> int minor;
> @@ -1086,8 +1005,6 @@ static int __init chr_dev_init(void)
> */
> if ((minor == DEVPORT_MINOR) && !arch_has_dev_port())
> continue;
> - if ((minor == DEVMEM_MINOR) && devmem_init_inode() != 0)
> - continue;
>
> device_create(mem_class, NULL, MKDEV(MEM_MAJOR, minor),
> NULL, devlist[minor].name);
> diff --git a/include/linux/ioport.h b/include/linux/ioport.h
> index 5135d4b86cd6..02a5466245c0 100644
> --- a/include/linux/ioport.h
> +++ b/include/linux/ioport.h
> @@ -307,11 +307,7 @@ struct resource *devm_request_free_mem_region(struct device *dev,
> struct resource *request_free_mem_region(struct resource *base,
> unsigned long size, const char *name);
>
> -#ifdef CONFIG_IO_STRICT_DEVMEM
> -void revoke_devmem(struct resource *res);
> -#else
> -static inline void revoke_devmem(struct resource *res) { };
> -#endif
> +extern struct address_space *iomem_get_mapping(void);
>
> #endif /* __ASSEMBLY__ */
> #endif /* _LINUX_IOPORT_H */
> diff --git a/kernel/resource.c b/kernel/resource.c
> index 3ae2f56cc79d..5ecc3187fe2d 100644
> --- a/kernel/resource.c
> +++ b/kernel/resource.c
> @@ -18,12 +18,15 @@
> #include <linux/spinlock.h>
> #include <linux/fs.h>
> #include <linux/proc_fs.h>
> +#include <linux/pseudo_fs.h>
> #include <linux/sched.h>
> #include <linux/seq_file.h>
> #include <linux/device.h>
> #include <linux/pfn.h>
> #include <linux/mm.h>
> +#include <linux/mount.h>
> #include <linux/resource_ext.h>
> +#include <uapi/linux/magic.h>
> #include <asm/io.h>
>
>
> @@ -1115,6 +1118,58 @@ resource_size_t resource_alignment(struct resource *res)
>
> static DECLARE_WAIT_QUEUE_HEAD(muxed_resource_wait);
>
> +static struct inode *iomem_inode;
> +
> +#ifdef CONFIG_IO_STRICT_DEVMEM
> +static void revoke_iomem(struct resource *res)
> +{
> + /* pairs with smp_store_release() in iomem_init_inode() */
> + struct inode *inode = smp_load_acquire(&iomem_inode);
> +
> + /*
> + * Check that the initialization has completed. Losing the race
> + * is ok because it means drivers are claiming resources before
> + * the fs_initcall level of init and prevent iomem_get_mapping users
> + * from establishing mappings.
> + */
> + if (!inode)
> + return;
> +
> + /*
> + * The expectation is that the driver has successfully marked
> + * the resource busy by this point, so devmem_is_allowed()
> + * should start returning false, however for performance this
> + * does not iterate the entire resource range.
> + */
> + if (devmem_is_allowed(PHYS_PFN(res->start)) &&
> + devmem_is_allowed(PHYS_PFN(res->end))) {
> + /*
> + * *cringe* iomem=relaxed says "go ahead, what's the
> + * worst that can happen?"
> + */
> + return;
> + }
> +
> + unmap_mapping_range(inode->i_mapping, res->start, resource_size(res), 1);
> +}
> +struct address_space *iomem_get_mapping(void)
> +{
> + /*
> + * This function is only called from file open paths, hence guaranteed
> + * that fs_initcalls have completed and no need to check for NULL. But
> + * since revoke_iomem can be called before the initcall we still need
> + * the barrier to appease checkers.
> + */
> + return smp_load_acquire(&iomem_inode)->i_mapping;
> +}
> +#else
> +static void revoke_iomem(struct resource *res) {}
> +struct address_space *iomem_get_mapping(void)
> +{
> + return NULL;
> +}
> +#endif
> +
> /**
> * __request_region - create a new busy resource region
> * @parent: parent resource descriptor
> @@ -1182,7 +1237,7 @@ struct resource * __request_region(struct resource *parent,
> write_unlock(&resource_lock);
>
> if (res && orig_parent == &iomem_resource)
> - revoke_devmem(res);
> + revoke_iomem(res);
>
> return res;
> }
> @@ -1782,4 +1837,48 @@ static int __init strict_iomem(char *str)
> return 1;
> }
>
> +static int iomem_fs_init_fs_context(struct fs_context *fc)
> +{
> + return init_pseudo(fc, DEVMEM_MAGIC) ? 0 : -ENOMEM;
> +}
> +
> +static struct file_system_type iomem_fs_type = {
> + .name = "iomem",
> + .owner = THIS_MODULE,
> + .init_fs_context = iomem_fs_init_fs_context,
> + .kill_sb = kill_anon_super,
> +};
> +
> +static int __init iomem_init_inode(void)
> +{
> + static struct vfsmount *iomem_vfs_mount;
> + static int iomem_fs_cnt;
> + struct inode *inode;
> + int rc;
> +
> + rc = simple_pin_fs(&iomem_fs_type, &iomem_vfs_mount, &iomem_fs_cnt);
> + if (rc < 0) {
> + pr_err("Cannot mount iomem pseudo filesystem: %d\n", rc);
> + return rc;
> + }
> +
> + inode = alloc_anon_inode(iomem_vfs_mount->mnt_sb);
> + if (IS_ERR(inode)) {
> + rc = PTR_ERR(inode);
> + pr_err("Cannot allocate inode for iomem: %d\n", rc);
> + simple_release_fs(&iomem_vfs_mount, &iomem_fs_cnt);
> + return rc;
> + }
> +
> + /*
> + * Publish iomem revocation inode initialized.
> + * Pairs with smp_load_acquire() in revoke_iomem().
> + */
> + smp_store_release(&iomem_inode, inode);
> +
> + return 0;
> +}
> +
> +fs_initcall(iomem_init_inode);
> +
> __setup("iomem=", strict_iomem);
>


2020-10-31 14:42:40

by Daniel Vetter

[permalink] [raw]
Subject: Re: [PATCH v5 13/15] resource: Move devmem revoke code to resource framework

On Sat, Oct 31, 2020 at 7:36 AM John Hubbard <[email protected]> wrote:
>
> On 10/30/20 3:08 AM, Daniel Vetter wrote:
> > We want all iomem mmaps to consistently revoke ptes when the kernel
> > takes over and CONFIG_IO_STRICT_DEVMEM is enabled. This includes the
> > pci bar mmaps available through procfs and sysfs, which currently do
> > not revoke mappings.
> >
> > To prepare for this, move the code from the /dev/kmem driver to
> > kernel/resource.c.
>
> This seems like it's doing a lot more than just code movement, right?
> Should we list some of that here?

It was meant to be just moving code, but then the inevitable bikeshed
showed up and I forgot to update the commit message properly. Will fix
that.

> Also, I'm seeing a crash due to this commit. More below:

Uh that's not good.
>
> >
> > Reviewed-by: Greg Kroah-Hartman <[email protected]>
> > Signed-off-by: Daniel Vetter <[email protected]>
> > Cc: Jason Gunthorpe <[email protected]>
> > Cc: Kees Cook <[email protected]>
> > Cc: Dan Williams <[email protected]>
> > Cc: Andrew Morton <[email protected]>
> > Cc: John Hubbard <[email protected]>
> > Cc: Jérôme Glisse <[email protected]>
> > Cc: Jan Kara <[email protected]>
> > Cc: Dan Williams <[email protected]>
> > Cc: [email protected]
> > Cc: [email protected]
> > Cc: [email protected]
> > Cc: [email protected]
> > Cc: Arnd Bergmann <[email protected]>
> > Cc: Greg Kroah-Hartman <[email protected]>
> > Cc: Daniel Vetter <[email protected]>
> > Cc: David Hildenbrand <[email protected]>
> > Cc: "Rafael J. Wysocki" <[email protected]>
> > Signed-off-by: Daniel Vetter <[email protected]>
> > --
> > v3:
> > - add barrier for consistency and document why we don't have to check
> > for NULL (Jason)
> > v4
> > - Adjust comments to reflect the general nature of this iomem revoke
> > code now (Dan)
> > ---
> > drivers/char/mem.c | 85 +---------------------------------
> > include/linux/ioport.h | 6 +--
> > kernel/resource.c | 101 ++++++++++++++++++++++++++++++++++++++++-
> > 3 files changed, 102 insertions(+), 90 deletions(-)
> >
> > diff --git a/drivers/char/mem.c b/drivers/char/mem.c
> > index 7dcf9e4ea79d..43c871dc7477 100644
> > --- a/drivers/char/mem.c
> > +++ b/drivers/char/mem.c
> > @@ -31,9 +31,6 @@
> > #include <linux/uio.h>
> > #include <linux/uaccess.h>
> > #include <linux/security.h>
> > -#include <linux/pseudo_fs.h>
> > -#include <uapi/linux/magic.h>
> > -#include <linux/mount.h>
> >
> > #ifdef CONFIG_IA64
> > # include <linux/efi.h>
> > @@ -836,42 +833,6 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
> > return ret;
> > }
> >
> > -static struct inode *devmem_inode;
> > -
> > -#ifdef CONFIG_IO_STRICT_DEVMEM
> > -void revoke_devmem(struct resource *res)
> > -{
> > - /* pairs with smp_store_release() in devmem_init_inode() */
> > - struct inode *inode = smp_load_acquire(&devmem_inode);
> > -
> > - /*
> > - * Check that the initialization has completed. Losing the race
> > - * is ok because it means drivers are claiming resources before
> > - * the fs_initcall level of init and prevent /dev/mem from
> > - * establishing mappings.
> > - */
> > - if (!inode)
> > - return;
> > -
> > - /*
> > - * The expectation is that the driver has successfully marked
> > - * the resource busy by this point, so devmem_is_allowed()
> > - * should start returning false, however for performance this
> > - * does not iterate the entire resource range.
> > - */
> > - if (devmem_is_allowed(PHYS_PFN(res->start)) &&
> > - devmem_is_allowed(PHYS_PFN(res->end))) {
> > - /*
> > - * *cringe* iomem=relaxed says "go ahead, what's the
> > - * worst that can happen?"
> > - */
> > - return;
> > - }
> > -
> > - unmap_mapping_range(inode->i_mapping, res->start, resource_size(res), 1);
> > -}
> > -#endif
> > -
> > static int open_port(struct inode *inode, struct file *filp)
> > {
> > int rc;
> > @@ -891,7 +852,7 @@ static int open_port(struct inode *inode, struct file *filp)
> > * revocations when drivers want to take over a /dev/mem mapped
> > * range.
> > */
> > - filp->f_mapping = inode->i_mapping;
> > + filp->f_mapping = iomem_get_mapping();
>
>
> The problem is that iomem_get_mapping() returns NULL for the !CONFIG_IO_STRICT_DEVMEM
> case. And then we have pre-existing fs code that expects to go "up and over", like this:
>
>
> static int do_dentry_open(struct file *f,
> struct inode *inode,
> int (*open)(struct inode *, struct file *))
> {
> ...
>
> file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
>
> ...and it crashes on that line fairly early in bootup.
>
> Not sure what to suggest for this patch, but wanted to get this report out at least.

Old code seems to have worked by always setting up the inode (we still
do that) and always setting it (we don't do that anymore), just not
revoking the ptes when the Kconfig is not set. I'll fix that up and
remove the behaviour change here.
-Daniel

> thanks,
> --
> John Hubbard
> NVIDIA
>
> >
> > return 0;
> > }
> > @@ -1023,48 +984,6 @@ static char *mem_devnode(struct device *dev, umode_t *mode)
> >
> > static struct class *mem_class;
> >
> > -static int devmem_fs_init_fs_context(struct fs_context *fc)
> > -{
> > - return init_pseudo(fc, DEVMEM_MAGIC) ? 0 : -ENOMEM;
> > -}
> > -
> > -static struct file_system_type devmem_fs_type = {
> > - .name = "devmem",
> > - .owner = THIS_MODULE,
> > - .init_fs_context = devmem_fs_init_fs_context,
> > - .kill_sb = kill_anon_super,
> > -};
> > -
> > -static int devmem_init_inode(void)
> > -{
> > - static struct vfsmount *devmem_vfs_mount;
> > - static int devmem_fs_cnt;
> > - struct inode *inode;
> > - int rc;
> > -
> > - rc = simple_pin_fs(&devmem_fs_type, &devmem_vfs_mount, &devmem_fs_cnt);
> > - if (rc < 0) {
> > - pr_err("Cannot mount /dev/mem pseudo filesystem: %d\n", rc);
> > - return rc;
> > - }
> > -
> > - inode = alloc_anon_inode(devmem_vfs_mount->mnt_sb);
> > - if (IS_ERR(inode)) {
> > - rc = PTR_ERR(inode);
> > - pr_err("Cannot allocate inode for /dev/mem: %d\n", rc);
> > - simple_release_fs(&devmem_vfs_mount, &devmem_fs_cnt);
> > - return rc;
> > - }
> > -
> > - /*
> > - * Publish /dev/mem initialized.
> > - * Pairs with smp_load_acquire() in revoke_devmem().
> > - */
> > - smp_store_release(&devmem_inode, inode);
> > -
> > - return 0;
> > -}
> > -
> > static int __init chr_dev_init(void)
> > {
> > int minor;
> > @@ -1086,8 +1005,6 @@ static int __init chr_dev_init(void)
> > */
> > if ((minor == DEVPORT_MINOR) && !arch_has_dev_port())
> > continue;
> > - if ((minor == DEVMEM_MINOR) && devmem_init_inode() != 0)
> > - continue;
> >
> > device_create(mem_class, NULL, MKDEV(MEM_MAJOR, minor),
> > NULL, devlist[minor].name);
> > diff --git a/include/linux/ioport.h b/include/linux/ioport.h
> > index 5135d4b86cd6..02a5466245c0 100644
> > --- a/include/linux/ioport.h
> > +++ b/include/linux/ioport.h
> > @@ -307,11 +307,7 @@ struct resource *devm_request_free_mem_region(struct device *dev,
> > struct resource *request_free_mem_region(struct resource *base,
> > unsigned long size, const char *name);
> >
> > -#ifdef CONFIG_IO_STRICT_DEVMEM
> > -void revoke_devmem(struct resource *res);
> > -#else
> > -static inline void revoke_devmem(struct resource *res) { };
> > -#endif
> > +extern struct address_space *iomem_get_mapping(void);
> >
> > #endif /* __ASSEMBLY__ */
> > #endif /* _LINUX_IOPORT_H */
> > diff --git a/kernel/resource.c b/kernel/resource.c
> > index 3ae2f56cc79d..5ecc3187fe2d 100644
> > --- a/kernel/resource.c
> > +++ b/kernel/resource.c
> > @@ -18,12 +18,15 @@
> > #include <linux/spinlock.h>
> > #include <linux/fs.h>
> > #include <linux/proc_fs.h>
> > +#include <linux/pseudo_fs.h>
> > #include <linux/sched.h>
> > #include <linux/seq_file.h>
> > #include <linux/device.h>
> > #include <linux/pfn.h>
> > #include <linux/mm.h>
> > +#include <linux/mount.h>
> > #include <linux/resource_ext.h>
> > +#include <uapi/linux/magic.h>
> > #include <asm/io.h>
> >
> >
> > @@ -1115,6 +1118,58 @@ resource_size_t resource_alignment(struct resource *res)
> >
> > static DECLARE_WAIT_QUEUE_HEAD(muxed_resource_wait);
> >
> > +static struct inode *iomem_inode;
> > +
> > +#ifdef CONFIG_IO_STRICT_DEVMEM
> > +static void revoke_iomem(struct resource *res)
> > +{
> > + /* pairs with smp_store_release() in iomem_init_inode() */
> > + struct inode *inode = smp_load_acquire(&iomem_inode);
> > +
> > + /*
> > + * Check that the initialization has completed. Losing the race
> > + * is ok because it means drivers are claiming resources before
> > + * the fs_initcall level of init and prevent iomem_get_mapping users
> > + * from establishing mappings.
> > + */
> > + if (!inode)
> > + return;
> > +
> > + /*
> > + * The expectation is that the driver has successfully marked
> > + * the resource busy by this point, so devmem_is_allowed()
> > + * should start returning false, however for performance this
> > + * does not iterate the entire resource range.
> > + */
> > + if (devmem_is_allowed(PHYS_PFN(res->start)) &&
> > + devmem_is_allowed(PHYS_PFN(res->end))) {
> > + /*
> > + * *cringe* iomem=relaxed says "go ahead, what's the
> > + * worst that can happen?"
> > + */
> > + return;
> > + }
> > +
> > + unmap_mapping_range(inode->i_mapping, res->start, resource_size(res), 1);
> > +}
> > +struct address_space *iomem_get_mapping(void)
> > +{
> > + /*
> > + * This function is only called from file open paths, hence guaranteed
> > + * that fs_initcalls have completed and no need to check for NULL. But
> > + * since revoke_iomem can be called before the initcall we still need
> > + * the barrier to appease checkers.
> > + */
> > + return smp_load_acquire(&iomem_inode)->i_mapping;
> > +}
> > +#else
> > +static void revoke_iomem(struct resource *res) {}
> > +struct address_space *iomem_get_mapping(void)
> > +{
> > + return NULL;
> > +}
> > +#endif
> > +
> > /**
> > * __request_region - create a new busy resource region
> > * @parent: parent resource descriptor
> > @@ -1182,7 +1237,7 @@ struct resource * __request_region(struct resource *parent,
> > write_unlock(&resource_lock);
> >
> > if (res && orig_parent == &iomem_resource)
> > - revoke_devmem(res);
> > + revoke_iomem(res);
> >
> > return res;
> > }
> > @@ -1782,4 +1837,48 @@ static int __init strict_iomem(char *str)
> > return 1;
> > }
> >
> > +static int iomem_fs_init_fs_context(struct fs_context *fc)
> > +{
> > + return init_pseudo(fc, DEVMEM_MAGIC) ? 0 : -ENOMEM;
> > +}
> > +
> > +static struct file_system_type iomem_fs_type = {
> > + .name = "iomem",
> > + .owner = THIS_MODULE,
> > + .init_fs_context = iomem_fs_init_fs_context,
> > + .kill_sb = kill_anon_super,
> > +};
> > +
> > +static int __init iomem_init_inode(void)
> > +{
> > + static struct vfsmount *iomem_vfs_mount;
> > + static int iomem_fs_cnt;
> > + struct inode *inode;
> > + int rc;
> > +
> > + rc = simple_pin_fs(&iomem_fs_type, &iomem_vfs_mount, &iomem_fs_cnt);
> > + if (rc < 0) {
> > + pr_err("Cannot mount iomem pseudo filesystem: %d\n", rc);
> > + return rc;
> > + }
> > +
> > + inode = alloc_anon_inode(iomem_vfs_mount->mnt_sb);
> > + if (IS_ERR(inode)) {
> > + rc = PTR_ERR(inode);
> > + pr_err("Cannot allocate inode for iomem: %d\n", rc);
> > + simple_release_fs(&iomem_vfs_mount, &iomem_fs_cnt);
> > + return rc;
> > + }
> > +
> > + /*
> > + * Publish iomem revocation inode initialized.
> > + * Pairs with smp_load_acquire() in revoke_iomem().
> > + */
> > + smp_store_release(&iomem_inode, inode);
> > +
> > + return 0;
> > +}
> > +
> > +fs_initcall(iomem_init_inode);
> > +
> > __setup("iomem=", strict_iomem);
> >
>
>


--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

2020-11-03 05:56:50

by kernel test robot

[permalink] [raw]
Subject: [resource] 22b17dc667: Kernel panic - not syncing: Fatal exception

Greeting,

FYI, we noticed the following commit (built with gcc-9):

commit: 22b17dc667d36418ccabb9c668c4b489185fb40a ("[PATCH v5 13/15] resource: Move devmem revoke code to resource framework")
url: https://github.com/0day-ci/linux/commits/Daniel-Vetter/follow_pfn-and-other-iomap-races/20201030-181112
base: git://linuxtv.org/media_tree.git master

in testcase: fsmark
version: fsmark-x86_64-3.3-1_20201007
with following parameters:

iterations: 1x
nr_threads: 1t
disk: 1BRD_48G
fs: f2fs
fs2: nfsv4
filesize: 4M
test_size: 40G
sync_method: NoSync
cpufreq_governor: performance
ucode: 0x5002f01

test-description: The fsmark is a file system benchmark to test synchronous write workloads, for example, mail servers workload.
test-url: https://sourceforge.net/projects/fsmark/


on test machine: 192 threads Intel(R) Xeon(R) Platinum 9242 CPU @ 2.30GHz with 192G memory

caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):


If you fix the issue, kindly add following tag
Reported-by: kernel test robot <[email protected]>


[ 28.644165] systemd[1]: RTC configured in localtime, applying delta of 0 minutes to system time.

[ 28.699473] #PF: supervisor read access in kernel mode
[ 28.704611] #PF: error_code(0x0000) - not-present page
[ 28.709749] PGD 0 P4D 0
[ 28.712291] Oops: 0000 [#1] SMP NOPTI
[ 28.715956] CPU: 0 PID: 1 Comm: systemd Not tainted 5.10.0-rc1-00015-g22b17dc667d3 #1
[ 28.723793] RIP: 0010:do_dentry_open+0x1c9/0x360
[ 28.728410] Code: 84 82 01 00 00 81 ca 00 00 04 00 89 53 44 48 8b 83 f0 00 00 00 81 63 40 3f fc ff ff 48 8d bb 98 00 00 00 c7 43 34 00 00 00 00 <48> 8b 00 48 8b 70 30 e8 2b cb f4 ff f6 43 41 40 74 5a 48 8b 83 f0
[ 28.747157] RSP: 0018:ffffc9000006fcc8 EFLAGS: 00010206
[ 28.752380] RAX: 0000000000000000 RBX: ffff8881502ad400 RCX: 0000000000000000
[ 28.759506] RDX: 00000000000a201d RSI: ffffffff8284d260 RDI: ffff8881502ad498
[ 28.766639] RBP: ffff88a485a06490 R08: 0000000000000000 R09: ffffffff8284d260
[ 28.773769] R10: ffffc9000006fcc8 R11: 756c6176006d656d R12: 0000000000000000
[ 28.780895] R13: ffffffff8133ddc0 R14: ffff8881502ad410 R15: ffff8881502ad400
[ 28.788028] FS: 00007ff54afa1940(0000) GS:ffff888c4f600000(0000) knlGS:0000000000000000
[ 28.796113] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 28.801858] CR2: 0000000000000000 CR3: 0000000100120003 CR4: 00000000007706f0
[ 28.808983] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 28.816114] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 28.823239] PKRU: 55555554
[ 28.825952] Call Trace:
[ 28.828412] path_openat+0xaa8/0x10a0
[ 28.832073] do_filp_open+0x91/0x100
[ 28.835653] ? acpi_os_wait_semaphore+0x48/0x80
[ 28.840186] ? __check_object_size+0x136/0x160
[ 28.844631] do_sys_openat2+0x20d/0x2e0
[ 28.848470] do_sys_open+0x44/0x80
[ 28.851878] do_syscall_64+0x33/0x40
[ 28.855457] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 28.860509] RIP: 0033:0x7ff54c1521ae
[ 28.864086] Code: 25 00 00 41 00 3d 00 00 41 00 74 48 48 8d 05 59 65 0d 00 8b 00 85 c0 75 69 89 f2 b8 01 01 00 00 48 89 fe bf 9c ff ff ff 0f 05 <48> 3d 00 f0 ff ff 0f 87 a6 00 00 00 48 8b 4c 24 28 64 48 33 0c 25
[ 28.882833] RSP: 002b:00007ffd1c9586d0 EFLAGS: 00000246 ORIG_RAX: 0000000000000101
[ 28.890399] RAX: ffffffffffffffda RBX: 00007ffd1c9587d0 RCX: 00007ff54c1521ae
[ 28.897531] RDX: 0000000000080000 RSI: 00007ff54bfa0e5a RDI: 00000000ffffff9c
[ 28.904662] RBP: 00007ffd1c9587d8 R08: 000000000000021f R09: 000055f837cf4290
[ 28.911796] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000056dd9000
[ 28.918927] R13: 00000000ffffffff R14: 00007ffd1c9587d0 R15: 0000000000000002
[ 28.926060] Modules linked in: ip_tables
[ 28.929986] CR2: 0000000000000000
mDebian GNU/Linu
[ 28.933416] ---[ end trace 94e4f9aa3df66098 ]---
[ 28.939355] RIP: 0010:do_dentry_open+0x1c9/0x360
[ 28.943975] Code: 84 82 01 00 00 81 ca 00 00 04 00 89 53 44 48 8b 83 f0 00 00 00 81 63 40 3f fc ff ff 48 8d bb 98 00 00 00 c7 43 34 00 00 00 00 <48> 8b 00 48 8b 70 30 e8 2b cb f4 ff f6 43 41 40 74 5a 48 8b 83 f0
[ 28.962721] RSP: 0018:ffffc9000006fcc8 EFLAGS: 00010206
[ 28.967948] RAX: 0000000000000000 RBX: ffff8881502ad400 RCX: 0000000000000000
[ 28.975079] RDX: 00000000000a201d RSI: ffffffff8284d260 RDI: ffff8881502ad498
[ 28.982211] RBP: ffff88a485a06490 R08: 0000000000000000 R09: ffffffff8284d260
[ 28.989337] R10: ffffc9000006fcc8 R11: 756c6176006d656d R12: 0000000000000000
[ 28.996467] R13: ffffffff8133ddc0 R14: ffff8881502ad410 R15: ffff8881502ad400
[ 29.003592] FS: 00007ff54afa1940(0000) GS:ffff888c4f600000(0000) knlGS:0000000000000000
[ 29.011668] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 29.017409] CR2: 0000000000000000 CR3: 0000000100120003 CR4: 00000000007706f0
[ 29.024539] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 29.031671] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 29.038804] PKRU: 55555554
[ 29.041508] Kernel panic - not syncing: Fatal exception
ACPI MEMORY or I/O RESET_REG.


To reproduce:

git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
bin/lkp install job.yaml # job file is attached in this email
bin/lkp run job.yaml



Thanks,
[email protected]


Attachments:
(No filename) (5.35 kB)
config-5.10.0-rc1-00015-g22b17dc667d3 (174.22 kB)
job-script (8.29 kB)
dmesg.xz (26.48 kB)
job.yaml (5.55 kB)
Download all attachments

2020-11-03 06:17:22

by John Hubbard

[permalink] [raw]
Subject: Re: [resource] 22b17dc667: Kernel panic - not syncing: Fatal exception

On 11/2/20 10:06 PM, lkp wrote:
> Greeting,
>
> FYI, we noticed the following commit (built with gcc-9):
>
> commit: 22b17dc667d36418ccabb9c668c4b489185fb40a ("[PATCH v5 13/15] resource: Move devmem revoke code to resource framework")
> url: https://github.com/0day-ci/linux/commits/Daniel-Vetter/follow_pfn-and-other-iomap-races/20201030-181112
> base: git://linuxtv.org/media_tree.git master
>
> in testcase: fsmark
> version: fsmark-x86_64-3.3-1_20201007
> with following parameters:
>
> iterations: 1x
> nr_threads: 1t
> disk: 1BRD_48G
> fs: f2fs
> fs2: nfsv4
> filesize: 4M
> test_size: 40G
> sync_method: NoSync
> cpufreq_governor: performance
> ucode: 0x5002f01
>
> test-description: The fsmark is a file system benchmark to test synchronous write workloads, for example, mail servers workload.
> test-url: https://sourceforge.net/projects/fsmark/
>
>
> on test machine: 192 threads Intel(R) Xeon(R) Platinum 9242 CPU @ 2.30GHz with 192G memory
>
> caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
>
>

Yep, this is the same crash that I saw. And the .config also has

# CONFIG_IO_STRICT_DEVMEM is not set

so it all makes sense.


> If you fix the issue, kindly add following tag
> Reported-by: kernel test robot <[email protected]>
>
>
> [ 28.644165] systemd[1]: RTC configured in localtime, applying delta of 0 minutes to system time.
>
> [ 28.699473] #PF: supervisor read access in kernel mode
> [ 28.704611] #PF: error_code(0x0000) - not-present page
> [ 28.709749] PGD 0 P4D 0
> [ 28.712291] Oops: 0000 [#1] SMP NOPTI
> [ 28.715956] CPU: 0 PID: 1 Comm: systemd Not tainted 5.10.0-rc1-00015-g22b17dc667d3 #1
> [ 28.723793] RIP: 0010:do_dentry_open+0x1c9/0x360
> [ 28.728410] Code: 84 82 01 00 00 81 ca 00 00 04 00 89 53 44 48 8b 83 f0 00 00 00 81 63 40 3f fc ff ff 48 8d bb 98 00 00 00 c7 43 34 00 00 00 00 <48> 8b 00 48 8b 70 30 e8 2b cb f4 ff f6 43 41 40 74 5a 48 8b 83 f0
> [ 28.747157] RSP: 0018:ffffc9000006fcc8 EFLAGS: 00010206
> [ 28.752380] RAX: 0000000000000000 RBX: ffff8881502ad400 RCX: 0000000000000000
> [ 28.759506] RDX: 00000000000a201d RSI: ffffffff8284d260 RDI: ffff8881502ad498
> [ 28.766639] RBP: ffff88a485a06490 R08: 0000000000000000 R09: ffffffff8284d260
> [ 28.773769] R10: ffffc9000006fcc8 R11: 756c6176006d656d R12: 0000000000000000
> [ 28.780895] R13: ffffffff8133ddc0 R14: ffff8881502ad410 R15: ffff8881502ad400
> [ 28.788028] FS: 00007ff54afa1940(0000) GS:ffff888c4f600000(0000) knlGS:0000000000000000
> [ 28.796113] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 28.801858] CR2: 0000000000000000 CR3: 0000000100120003 CR4: 00000000007706f0
> [ 28.808983] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 28.816114] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 28.823239] PKRU: 55555554
> [ 28.825952] Call Trace:
> [ 28.828412] path_openat+0xaa8/0x10a0
> [ 28.832073] do_filp_open+0x91/0x100
> [ 28.835653] ? acpi_os_wait_semaphore+0x48/0x80
> [ 28.840186] ? __check_object_size+0x136/0x160
> [ 28.844631] do_sys_openat2+0x20d/0x2e0
> [ 28.848470] do_sys_open+0x44/0x80
> [ 28.851878] do_syscall_64+0x33/0x40
> [ 28.855457] entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [ 28.860509] RIP: 0033:0x7ff54c1521ae
> [ 28.864086] Code: 25 00 00 41 00 3d 00 00 41 00 74 48 48 8d 05 59 65 0d 00 8b 00 85 c0 75 69 89 f2 b8 01 01 00 00 48 89 fe bf 9c ff ff ff 0f 05 <48> 3d 00 f0 ff ff 0f 87 a6 00 00 00 48 8b 4c 24 28 64 48 33 0c 25
> [ 28.882833] RSP: 002b:00007ffd1c9586d0 EFLAGS: 00000246 ORIG_RAX: 0000000000000101
> [ 28.890399] RAX: ffffffffffffffda RBX: 00007ffd1c9587d0 RCX: 00007ff54c1521ae
> [ 28.897531] RDX: 0000000000080000 RSI: 00007ff54bfa0e5a RDI: 00000000ffffff9c
> [ 28.904662] RBP: 00007ffd1c9587d8 R08: 000000000000021f R09: 000055f837cf4290
> [ 28.911796] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000056dd9000
> [ 28.918927] R13: 00000000ffffffff R14: 00007ffd1c9587d0 R15: 0000000000000002
> [ 28.926060] Modules linked in: ip_tables
> [ 28.929986] CR2: 0000000000000000
> mDebian GNU/Linu
> [ 28.933416] ---[ end trace 94e4f9aa3df66098 ]---
> [ 28.939355] RIP: 0010:do_dentry_open+0x1c9/0x360
> [ 28.943975] Code: 84 82 01 00 00 81 ca 00 00 04 00 89 53 44 48 8b 83 f0 00 00 00 81 63 40 3f fc ff ff 48 8d bb 98 00 00 00 c7 43 34 00 00 00 00 <48> 8b 00 48 8b 70 30 e8 2b cb f4 ff f6 43 41 40 74 5a 48 8b 83 f0
> [ 28.962721] RSP: 0018:ffffc9000006fcc8 EFLAGS: 00010206
> [ 28.967948] RAX: 0000000000000000 RBX: ffff8881502ad400 RCX: 0000000000000000
> [ 28.975079] RDX: 00000000000a201d RSI: ffffffff8284d260 RDI: ffff8881502ad498
> [ 28.982211] RBP: ffff88a485a06490 R08: 0000000000000000 R09: ffffffff8284d260
> [ 28.989337] R10: ffffc9000006fcc8 R11: 756c6176006d656d R12: 0000000000000000
> [ 28.996467] R13: ffffffff8133ddc0 R14: ffff8881502ad410 R15: ffff8881502ad400
> [ 29.003592] FS: 00007ff54afa1940(0000) GS:ffff888c4f600000(0000) knlGS:0000000000000000
> [ 29.011668] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 29.017409] CR2: 0000000000000000 CR3: 0000000100120003 CR4: 00000000007706f0
> [ 29.024539] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 29.031671] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 29.038804] PKRU: 55555554
> [ 29.041508] Kernel panic - not syncing: Fatal exception
> ACPI MEMORY or I/O RESET_REG.
>
>
> To reproduce:
>
> git clone https://github.com/intel/lkp-tests.git
> cd lkp-tests
> bin/lkp install job.yaml # job file is attached in this email
> bin/lkp run job.yaml
>
>
>
> Thanks,
> [email protected]
>

thanks,
--
John Hubbard
NVIDIA

2020-11-03 10:12:35

by Daniel Vetter

[permalink] [raw]
Subject: Re: [resource] 22b17dc667: Kernel panic - not syncing: Fatal exception

On Mon, Nov 02, 2020 at 10:15:40PM -0800, John Hubbard wrote:
> On 11/2/20 10:06 PM, lkp wrote:
> > Greeting,
> >
> > FYI, we noticed the following commit (built with gcc-9):
> >
> > commit: 22b17dc667d36418ccabb9c668c4b489185fb40a ("[PATCH v5 13/15] resource: Move devmem revoke code to resource framework")
> > url: https://github.com/0day-ci/linux/commits/Daniel-Vetter/follow_pfn-and-other-iomap-races/20201030-181112
> > base: git://linuxtv.org/media_tree.git master
> >
> > in testcase: fsmark
> > version: fsmark-x86_64-3.3-1_20201007
> > with following parameters:
> >
> > iterations: 1x
> > nr_threads: 1t
> > disk: 1BRD_48G
> > fs: f2fs
> > fs2: nfsv4
> > filesize: 4M
> > test_size: 40G
> > sync_method: NoSync
> > cpufreq_governor: performance
> > ucode: 0x5002f01
> >
> > test-description: The fsmark is a file system benchmark to test synchronous write workloads, for example, mail servers workload.
> > test-url: https://sourceforge.net/projects/fsmark/
> >
> >
> > on test machine: 192 threads Intel(R) Xeon(R) Platinum 9242 CPU @ 2.30GHz with 192G memory
> >
> > caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
> >
> >
>
> Yep, this is the same crash that I saw. And the .config also has
>
> # CONFIG_IO_STRICT_DEVMEM is not set
>
> so it all makes sense.

New version is on its way, I "just" need to setup ppc cross compiler for
the kvm part and figure out how to test the media side with the sketch
tfiga provided ...
-Daniel

>
>
> > If you fix the issue, kindly add following tag
> > Reported-by: kernel test robot <[email protected]>
> >
> >
> > [ 28.644165] systemd[1]: RTC configured in localtime, applying delta of 0 minutes to system time.
> >
> > [ 28.699473] #PF: supervisor read access in kernel mode
> > [ 28.704611] #PF: error_code(0x0000) - not-present page
> > [ 28.709749] PGD 0 P4D 0
> > [ 28.712291] Oops: 0000 [#1] SMP NOPTI
> > [ 28.715956] CPU: 0 PID: 1 Comm: systemd Not tainted 5.10.0-rc1-00015-g22b17dc667d3 #1
> > [ 28.723793] RIP: 0010:do_dentry_open+0x1c9/0x360
> > [ 28.728410] Code: 84 82 01 00 00 81 ca 00 00 04 00 89 53 44 48 8b 83 f0 00 00 00 81 63 40 3f fc ff ff 48 8d bb 98 00 00 00 c7 43 34 00 00 00 00 <48> 8b 00 48 8b 70 30 e8 2b cb f4 ff f6 43 41 40 74 5a 48 8b 83 f0
> > [ 28.747157] RSP: 0018:ffffc9000006fcc8 EFLAGS: 00010206
> > [ 28.752380] RAX: 0000000000000000 RBX: ffff8881502ad400 RCX: 0000000000000000
> > [ 28.759506] RDX: 00000000000a201d RSI: ffffffff8284d260 RDI: ffff8881502ad498
> > [ 28.766639] RBP: ffff88a485a06490 R08: 0000000000000000 R09: ffffffff8284d260
> > [ 28.773769] R10: ffffc9000006fcc8 R11: 756c6176006d656d R12: 0000000000000000
> > [ 28.780895] R13: ffffffff8133ddc0 R14: ffff8881502ad410 R15: ffff8881502ad400
> > [ 28.788028] FS: 00007ff54afa1940(0000) GS:ffff888c4f600000(0000) knlGS:0000000000000000
> > [ 28.796113] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [ 28.801858] CR2: 0000000000000000 CR3: 0000000100120003 CR4: 00000000007706f0
> > [ 28.808983] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > [ 28.816114] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> > [ 28.823239] PKRU: 55555554
> > [ 28.825952] Call Trace:
> > [ 28.828412] path_openat+0xaa8/0x10a0
> > [ 28.832073] do_filp_open+0x91/0x100
> > [ 28.835653] ? acpi_os_wait_semaphore+0x48/0x80
> > [ 28.840186] ? __check_object_size+0x136/0x160
> > [ 28.844631] do_sys_openat2+0x20d/0x2e0
> > [ 28.848470] do_sys_open+0x44/0x80
> > [ 28.851878] do_syscall_64+0x33/0x40
> > [ 28.855457] entry_SYSCALL_64_after_hwframe+0x44/0xa9
> > [ 28.860509] RIP: 0033:0x7ff54c1521ae
> > [ 28.864086] Code: 25 00 00 41 00 3d 00 00 41 00 74 48 48 8d 05 59 65 0d 00 8b 00 85 c0 75 69 89 f2 b8 01 01 00 00 48 89 fe bf 9c ff ff ff 0f 05 <48> 3d 00 f0 ff ff 0f 87 a6 00 00 00 48 8b 4c 24 28 64 48 33 0c 25
> > [ 28.882833] RSP: 002b:00007ffd1c9586d0 EFLAGS: 00000246 ORIG_RAX: 0000000000000101
> > [ 28.890399] RAX: ffffffffffffffda RBX: 00007ffd1c9587d0 RCX: 00007ff54c1521ae
> > [ 28.897531] RDX: 0000000000080000 RSI: 00007ff54bfa0e5a RDI: 00000000ffffff9c
> > [ 28.904662] RBP: 00007ffd1c9587d8 R08: 000000000000021f R09: 000055f837cf4290
> > [ 28.911796] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000056dd9000
> > [ 28.918927] R13: 00000000ffffffff R14: 00007ffd1c9587d0 R15: 0000000000000002
> > [ 28.926060] Modules linked in: ip_tables
> > [ 28.929986] CR2: 0000000000000000
> > mDebian GNU/Linu
> > [ 28.933416] ---[ end trace 94e4f9aa3df66098 ]---
> > [ 28.939355] RIP: 0010:do_dentry_open+0x1c9/0x360
> > [ 28.943975] Code: 84 82 01 00 00 81 ca 00 00 04 00 89 53 44 48 8b 83 f0 00 00 00 81 63 40 3f fc ff ff 48 8d bb 98 00 00 00 c7 43 34 00 00 00 00 <48> 8b 00 48 8b 70 30 e8 2b cb f4 ff f6 43 41 40 74 5a 48 8b 83 f0
> > [ 28.962721] RSP: 0018:ffffc9000006fcc8 EFLAGS: 00010206
> > [ 28.967948] RAX: 0000000000000000 RBX: ffff8881502ad400 RCX: 0000000000000000
> > [ 28.975079] RDX: 00000000000a201d RSI: ffffffff8284d260 RDI: ffff8881502ad498
> > [ 28.982211] RBP: ffff88a485a06490 R08: 0000000000000000 R09: ffffffff8284d260
> > [ 28.989337] R10: ffffc9000006fcc8 R11: 756c6176006d656d R12: 0000000000000000
> > [ 28.996467] R13: ffffffff8133ddc0 R14: ffff8881502ad410 R15: ffff8881502ad400
> > [ 29.003592] FS: 00007ff54afa1940(0000) GS:ffff888c4f600000(0000) knlGS:0000000000000000
> > [ 29.011668] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [ 29.017409] CR2: 0000000000000000 CR3: 0000000100120003 CR4: 00000000007706f0
> > [ 29.024539] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > [ 29.031671] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> > [ 29.038804] PKRU: 55555554
> > [ 29.041508] Kernel panic - not syncing: Fatal exception
> > ACPI MEMORY or I/O RESET_REG.
> >
> >
> > To reproduce:
> >
> > git clone https://github.com/intel/lkp-tests.git
> > cd lkp-tests
> > bin/lkp install job.yaml # job file is attached in this email
> > bin/lkp run job.yaml
> >
> >
> >
> > Thanks,
> > [email protected]
> >
>
> thanks,
> --
> John Hubbard
> NVIDIA

--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch