2015-07-13 10:54:18

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCH 0/5] Make vma_is_anonymous() reliable

We rely on ->vm_ops == NULL to detect anonymous VMA but this check is not
always reliable:

- MPX sets ->vm_ops on anonymous VMA;

- many drivers don't set ->vm_ops. See for instance hpet_mmap().

This patchset makes vma_is_anonymous() more reliable and makes few
cleanups around the code.

Kirill A. Shutemov (5):
mm: mark most vm_operations_struct const
x86, mpx: do not set ->vm_ops on mpx VMAs
mm: make sure all file VMAs have ->vm_ops set
mm, madvise: use vma_is_anonymous() to check for anon VMA
mm, memcontrol: use vma_is_anonymous() to check for anon VMA

arch/x86/kernel/vsyscall_64.c | 2 +-
arch/x86/mm/mmap.c | 7 +++++++
arch/x86/mm/mpx.c | 20 +-------------------
drivers/android/binder.c | 2 +-
drivers/gpu/drm/vgem/vgem_drv.c | 2 +-
drivers/hsi/clients/cmt_speech.c | 2 +-
drivers/infiniband/hw/qib/qib_file_ops.c | 2 +-
drivers/infiniband/hw/qib/qib_mmap.c | 2 +-
drivers/media/platform/omap/omap_vout.c | 2 +-
drivers/misc/genwqe/card_dev.c | 2 +-
drivers/staging/android/ion/ion.c | 2 +-
drivers/staging/comedi/comedi_fops.c | 2 +-
drivers/video/fbdev/omap2/omapfb/omapfb-main.c | 2 +-
drivers/xen/gntalloc.c | 2 +-
drivers/xen/gntdev.c | 2 +-
drivers/xen/privcmd.c | 4 ++--
fs/ceph/addr.c | 2 +-
fs/cifs/file.c | 2 +-
mm/madvise.c | 2 +-
mm/memcontrol.c | 2 +-
mm/mmap.c | 8 ++++++++
security/selinux/selinuxfs.c | 2 +-
22 files changed, 36 insertions(+), 39 deletions(-)

--
2.1.4


2015-07-13 10:54:19

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCH 1/5] mm: mark most vm_operations_struct const

With two excetions (drm/qxl and drm/radeon) all vm_operations_struct
structs should be constant.

Signed-off-by: Kirill A. Shutemov <[email protected]>
---
arch/x86/kernel/vsyscall_64.c | 2 +-
drivers/android/binder.c | 2 +-
drivers/gpu/drm/vgem/vgem_drv.c | 2 +-
drivers/hsi/clients/cmt_speech.c | 2 +-
drivers/infiniband/hw/qib/qib_file_ops.c | 2 +-
drivers/infiniband/hw/qib/qib_mmap.c | 2 +-
drivers/media/platform/omap/omap_vout.c | 2 +-
drivers/misc/genwqe/card_dev.c | 2 +-
drivers/staging/android/ion/ion.c | 2 +-
drivers/staging/comedi/comedi_fops.c | 2 +-
drivers/video/fbdev/omap2/omapfb/omapfb-main.c | 2 +-
drivers/xen/gntalloc.c | 2 +-
drivers/xen/gntdev.c | 2 +-
drivers/xen/privcmd.c | 4 ++--
fs/ceph/addr.c | 2 +-
fs/cifs/file.c | 2 +-
security/selinux/selinuxfs.c | 2 +-
17 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 2dcc6ff6fdcc..30f3a83291ad 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -277,7 +277,7 @@ static const char *gate_vma_name(struct vm_area_struct *vma)
{
return "[vsyscall]";
}
-static struct vm_operations_struct gate_vma_ops = {
+static const struct vm_operations_struct gate_vma_ops = {
.name = gate_vma_name,
};
static struct vm_area_struct gate_vma = {
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 6607f3c6ace1..a39e85f9efa9 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2834,7 +2834,7 @@ static int binder_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}

-static struct vm_operations_struct binder_vm_ops = {
+static const struct vm_operations_struct binder_vm_ops = {
.open = binder_vma_open,
.close = binder_vma_close,
.fault = binder_vm_fault,
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 7a207ca547be..27c2c473d9d3 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -125,7 +125,7 @@ static int vgem_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
}
}

-static struct vm_operations_struct vgem_gem_vm_ops = {
+static const struct vm_operations_struct vgem_gem_vm_ops = {
.fault = vgem_gem_fault,
.open = drm_gem_vm_open,
.close = drm_gem_vm_close,
diff --git a/drivers/hsi/clients/cmt_speech.c b/drivers/hsi/clients/cmt_speech.c
index 4983529a9c6c..914b2c2d1f2d 100644
--- a/drivers/hsi/clients/cmt_speech.c
+++ b/drivers/hsi/clients/cmt_speech.c
@@ -1105,7 +1105,7 @@ static int cs_char_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return 0;
}

-static struct vm_operations_struct cs_char_vm_ops = {
+static const struct vm_operations_struct cs_char_vm_ops = {
.fault = cs_char_vma_fault,
};

diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 725881890c4a..e449e394963f 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -908,7 +908,7 @@ static int qib_file_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return 0;
}

-static struct vm_operations_struct qib_file_vm_ops = {
+static const struct vm_operations_struct qib_file_vm_ops = {
.fault = qib_file_vma_fault,
};

diff --git a/drivers/infiniband/hw/qib/qib_mmap.c b/drivers/infiniband/hw/qib/qib_mmap.c
index 146cf29a2e1d..34927b700b0e 100644
--- a/drivers/infiniband/hw/qib/qib_mmap.c
+++ b/drivers/infiniband/hw/qib/qib_mmap.c
@@ -75,7 +75,7 @@ static void qib_vma_close(struct vm_area_struct *vma)
kref_put(&ip->ref, qib_release_mmap_info);
}

-static struct vm_operations_struct qib_vm_ops = {
+static const struct vm_operations_struct qib_vm_ops = {
.open = qib_vma_open,
.close = qib_vma_close,
};
diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c
index 17b189a81ec5..38a50df8d629 100644
--- a/drivers/media/platform/omap/omap_vout.c
+++ b/drivers/media/platform/omap/omap_vout.c
@@ -876,7 +876,7 @@ static void omap_vout_vm_close(struct vm_area_struct *vma)
vout->mmap_count--;
}

-static struct vm_operations_struct omap_vout_vm_ops = {
+static const struct vm_operations_struct omap_vout_vm_ops = {
.open = omap_vout_vm_open,
.close = omap_vout_vm_close,
};
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
index c49d244265ec..70e62d6a3231 100644
--- a/drivers/misc/genwqe/card_dev.c
+++ b/drivers/misc/genwqe/card_dev.c
@@ -418,7 +418,7 @@ static void genwqe_vma_close(struct vm_area_struct *vma)
kfree(dma_map);
}

-static struct vm_operations_struct genwqe_vma_ops = {
+static const struct vm_operations_struct genwqe_vma_ops = {
.open = genwqe_vma_open,
.close = genwqe_vma_close,
};
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index b0b96ab31954..2bbfed9acf19 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -997,7 +997,7 @@ static void ion_vm_close(struct vm_area_struct *vma)
mutex_unlock(&buffer->lock);
}

-static struct vm_operations_struct ion_vma_ops = {
+static const struct vm_operations_struct ion_vma_ops = {
.open = ion_vm_open,
.close = ion_vm_close,
.fault = ion_vm_fault,
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index e78ddbe5a954..1649665eb633 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -2127,7 +2127,7 @@ static void comedi_vm_close(struct vm_area_struct *area)
comedi_buf_map_put(bm);
}

-static struct vm_operations_struct comedi_vm_ops = {
+static const struct vm_operations_struct comedi_vm_ops = {
.open = comedi_vm_open,
.close = comedi_vm_close,
};
diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb-main.c b/drivers/video/fbdev/omap2/omapfb/omapfb-main.c
index 4f0cbb54d4db..d3af01c94a58 100644
--- a/drivers/video/fbdev/omap2/omapfb/omapfb-main.c
+++ b/drivers/video/fbdev/omap2/omapfb/omapfb-main.c
@@ -1091,7 +1091,7 @@ static void mmap_user_close(struct vm_area_struct *vma)
omapfb_put_mem_region(rg);
}

-static struct vm_operations_struct mmap_user_ops = {
+static const struct vm_operations_struct mmap_user_ops = {
.open = mmap_user_open,
.close = mmap_user_close,
};
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index e53fe191738c..696301d9dc91 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -493,7 +493,7 @@ static void gntalloc_vma_close(struct vm_area_struct *vma)
mutex_unlock(&gref_mutex);
}

-static struct vm_operations_struct gntalloc_vmops = {
+static const struct vm_operations_struct gntalloc_vmops = {
.open = gntalloc_vma_open,
.close = gntalloc_vma_close,
};
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 89274850741b..b112d529a2d1 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -433,7 +433,7 @@ static struct page *gntdev_vma_find_special_page(struct vm_area_struct *vma,
return map->pages[(addr - map->pages_vm_start) >> PAGE_SHIFT];
}

-static struct vm_operations_struct gntdev_vmops = {
+static const struct vm_operations_struct gntdev_vmops = {
.open = gntdev_vma_open,
.close = gntdev_vma_close,
.find_special_page = gntdev_vma_find_special_page,
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 5a296161d843..56cb13fcbd0e 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -414,7 +414,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
return 0;
}

-static struct vm_operations_struct privcmd_vm_ops;
+static const struct vm_operations_struct privcmd_vm_ops;

static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
{
@@ -605,7 +605,7 @@ static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}

-static struct vm_operations_struct privcmd_vm_ops = {
+static const struct vm_operations_struct privcmd_vm_ops = {
.close = privcmd_close,
.fault = privcmd_fault
};
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index e162bcd105ee..9fa7b3812851 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1582,7 +1582,7 @@ out:
return err;
}

-static struct vm_operations_struct ceph_vmops = {
+static const struct vm_operations_struct ceph_vmops = {
.fault = ceph_filemap_fault,
.page_mkwrite = ceph_page_mkwrite,
};
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 2ac2d8471393..94f81962368c 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3216,7 +3216,7 @@ cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
return VM_FAULT_LOCKED;
}

-static struct vm_operations_struct cifs_file_vm_ops = {
+static const struct vm_operations_struct cifs_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = cifs_page_mkwrite,
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index d2787cca1fcb..0538f7034ef2 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -472,7 +472,7 @@ static int sel_mmap_policy_fault(struct vm_area_struct *vma,
return 0;
}

-static struct vm_operations_struct sel_mmap_policy_ops = {
+static const struct vm_operations_struct sel_mmap_policy_ops = {
.fault = sel_mmap_policy_fault,
.page_mkwrite = sel_mmap_policy_fault,
};
--
2.1.4

2015-07-13 10:54:21

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCH 2/5] x86, mpx: do not set ->vm_ops on mpx VMAs

MPX setups private anonymous mapping, but uses vma->vm_ops too.
This can confuse core VM, as it relies on vm->vm_ops to distinguish
file VMAs from anonymous.

As result we will get SIGBUS, because handle_pte_fault() thinks it's
file VMA without vm_ops->fault and it doesn't know how to handle the
situation properly.

Let's fix that by not setting ->vm_ops.

We don't really need ->vm_ops here: MPX VMA can be detected with VM_MPX
flag. And vma_merge() will not merge MPX VMA with non-MPX VMA, because
->vm_flags won't match.

The only thing left is name of VMA. I'm not sure if it's part of ABI, or
we can just drop it. The patch keep it by providing arch_vma_name() on x86.

Build tested only.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Thomas Gleixner <[email protected]>
---
arch/x86/mm/mmap.c | 7 +++++++
arch/x86/mm/mpx.c | 20 +-------------------
2 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 9d518d693b4b..844b06d67df4 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -126,3 +126,10 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+ if (vma->vm_flags & VM_MPX)
+ return "[mpx]";
+ return NULL;
+}
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index c439ec478216..4d1c11c07fe1 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -18,26 +18,9 @@
#include <asm/processor.h>
#include <asm/fpu-internal.h>

-static const char *mpx_mapping_name(struct vm_area_struct *vma)
-{
- return "[mpx]";
-}
-
-static struct vm_operations_struct mpx_vma_ops = {
- .name = mpx_mapping_name,
-};
-
-static int is_mpx_vma(struct vm_area_struct *vma)
-{
- return (vma->vm_ops == &mpx_vma_ops);
-}
-
/*
* This is really a simplified "vm_mmap". it only handles MPX
* bounds tables (the bounds directory is user-allocated).
- *
- * Later on, we use the vma->vm_ops to uniquely identify these
- * VMAs.
*/
static unsigned long mpx_mmap(unsigned long len)
{
@@ -83,7 +66,6 @@ static unsigned long mpx_mmap(unsigned long len)
ret = -ENOMEM;
goto out;
}
- vma->vm_ops = &mpx_vma_ops;

if (vm_flags & VM_LOCKED) {
up_write(&mm->mmap_sem);
@@ -661,7 +643,7 @@ static int zap_bt_entries(struct mm_struct *mm,
* so stop immediately and return an error. This
* probably results in a SIGSEGV.
*/
- if (!is_mpx_vma(vma))
+ if (!(vma->vm_flags & VM_MPX))
return -EINVAL;

len = min(vma->vm_end, end) - addr;
--
2.1.4

2015-07-13 10:55:35

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCH 3/5] mm: make sure all file VMAs have ->vm_ops set

We rely on vma->vm_ops == NULL to detect anonymous VMA: see
vma_is_anonymous(), but some drivers doesn't set ->vm_ops.

As result we can end up with anonymous page in private file mapping.
That's should not lead to serious misbehaviour, but nevertheless is
wrong.

Let's fix by setting up dummy ->vm_ops for file mmapping if f_op->mmap()
didn't set its own.

The patch also adds sanity check into __vma_link_rb(). It will help
catch broken VMAs which inserted directly into mm_struct via
insert_vm_struct().

Signed-off-by: Kirill A. Shutemov <[email protected]>
---
mm/mmap.c | 8 ++++++++
1 file changed, 8 insertions(+)

diff --git a/mm/mmap.c b/mm/mmap.c
index 30904c16b7d3..4ce7a6f33db0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -612,6 +612,8 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm,
void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
struct rb_node **rb_link, struct rb_node *rb_parent)
{
+ WARN_ONCE(vma->vm_file && !vma->vm_ops, "missing vma->vm_ops");
+
/* Update tracking information for the gap following the new vma. */
if (vma->vm_next)
vma_gap_update(vma->vm_next);
@@ -1638,6 +1640,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
*/
WARN_ON_ONCE(addr != vma->vm_start);

+ /* All file mapping must have ->vm_ops set */
+ if (!vma->vm_ops) {
+ static const struct vm_operations_struct dummy_ops = {};
+ vma->vm_ops = &dummy_ops;
+ }
+
addr = vma->vm_start;
vm_flags = vma->vm_flags;
} else if (vm_flags & VM_SHARED) {
--
2.1.4

2015-07-13 10:54:20

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCH 4/5] mm, madvise: use vma_is_anonymous() to check for anon VMA

!vma->vm_file is not reliable to detect anon VMA, because not all
drivers bother set it. Let's use vma_is_anonymous() instead.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Cc: Minchan Kim <[email protected]>
---
mm/madvise.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index 70ce0d425d72..a4fae076f61d 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -393,7 +393,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
return -EINVAL;

/* MADV_FREE works for only anon vma at the moment */
- if (vma->vm_file)
+ if (!vma_is_anonymous(vma))
return -EINVAL;

start = max(vma->vm_start, start_addr);
--
2.1.4

2015-07-13 10:54:45

by Kirill A. Shutemov

[permalink] [raw]
Subject: [PATCH 5/5] mm, memcontrol: use vma_is_anonymous() to check for anon VMA

!vma->vm_file is not reliable to detect anon VMA, because not all
drivers bother set it. Let's use vma_is_anonymous() instead.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Michal Hocko <[email protected]>
---
mm/memcontrol.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index acb93c554f6e..a624709f0dd7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4809,7 +4809,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
struct address_space *mapping;
pgoff_t pgoff;

- if (!vma->vm_file) /* anonymous vma */
+ if (vma_is_anonymous(vma)) /* anonymous vma */
return NULL;
if (!(mc.flags & MOVE_FILE))
return NULL;
--
2.1.4

2015-07-13 12:06:52

by Leon Romanovsky

[permalink] [raw]
Subject: Re: [PATCH 2/5] x86, mpx: do not set ->vm_ops on mpx VMAs

Hi Kirill,

On Mon, Jul 13, 2015 at 1:54 PM, Kirill A. Shutemov
<[email protected]> wrote:
>
> MPX setups private anonymous mapping, but uses vma->vm_ops too.
> This can confuse core VM, as it relies on vm->vm_ops to distinguish
> file VMAs from anonymous.
>
> As result we will get SIGBUS, because handle_pte_fault() thinks it's
> file VMA without vm_ops->fault and it doesn't know how to handle the
> situation properly.
>
> Let's fix that by not setting ->vm_ops.
>
> We don't really need ->vm_ops here: MPX VMA can be detected with VM_MPX
> flag. And vma_merge() will not merge MPX VMA with non-MPX VMA, because
> ->vm_flags won't match.
>
> The only thing left is name of VMA. I'm not sure if it's part of ABI, or
> we can just drop it. The patch keep it by providing arch_vma_name() on x86.
>
> Build tested only.
>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
> Cc: Dave Hansen <[email protected]>
> Cc: Andy Lutomirski <[email protected]>
> Cc: Thomas Gleixner <[email protected]>
> ---
> arch/x86/mm/mmap.c | 7 +++++++
> arch/x86/mm/mpx.c | 20 +-------------------
> 2 files changed, 8 insertions(+), 19 deletions(-)
>
> diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
> index 9d518d693b4b..844b06d67df4 100644
> --- a/arch/x86/mm/mmap.c
> +++ b/arch/x86/mm/mmap.c
> @@ -126,3 +126,10 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
> mm->get_unmapped_area = arch_get_unmapped_area_topdown;
> }
> }
> +
> +const char *arch_vma_name(struct vm_area_struct *vma)
> +{
> + if (vma->vm_flags & VM_MPX)
> + return "[mpx]";
> + return NULL;
> +}

I sure that I'm missing something important. This function stores
"[mpx]" string on this function stack and returns the pointer to that
address. In current flow, this address is visible and accessible,
however in can be a different in general case.

> diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
> index c439ec478216..4d1c11c07fe1 100644
> --- a/arch/x86/mm/mpx.c
> +++ b/arch/x86/mm/mpx.c
> @@ -18,26 +18,9 @@
> #include <asm/processor.h>
> #include <asm/fpu-internal.h>
>
> -static const char *mpx_mapping_name(struct vm_area_struct *vma)
> -{
> - return "[mpx]";
> -}
> -
> -static struct vm_operations_struct mpx_vma_ops = {
> - .name = mpx_mapping_name,
> -};
> -
> -static int is_mpx_vma(struct vm_area_struct *vma)
> -{
> - return (vma->vm_ops == &mpx_vma_ops);
> -}
> -
> /*
> * This is really a simplified "vm_mmap". it only handles MPX
> * bounds tables (the bounds directory is user-allocated).
> - *
> - * Later on, we use the vma->vm_ops to uniquely identify these
> - * VMAs.
> */
> static unsigned long mpx_mmap(unsigned long len)
> {
> @@ -83,7 +66,6 @@ static unsigned long mpx_mmap(unsigned long len)
> ret = -ENOMEM;
> goto out;
> }
> - vma->vm_ops = &mpx_vma_ops;
>
> if (vm_flags & VM_LOCKED) {
> up_write(&mm->mmap_sem);
> @@ -661,7 +643,7 @@ static int zap_bt_entries(struct mm_struct *mm,
> * so stop immediately and return an error. This
> * probably results in a SIGSEGV.
> */
> - if (!is_mpx_vma(vma))
> + if (!(vma->vm_flags & VM_MPX))
> return -EINVAL;
>
> len = min(vma->vm_end, end) - addr;
> --
> 2.1.4
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to [email protected]. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"[email protected]"> [email protected] </a>




--
Leon Romanovsky | Independent Linux Consultant
http://www.leon.nu | [email protected]

2015-07-13 12:30:10

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [PATCH 2/5] x86, mpx: do not set ->vm_ops on mpx VMAs

Leon Romanovsky wrote:
> Hi Kirill,
>
> On Mon, Jul 13, 2015 at 1:54 PM, Kirill A. Shutemov
> <[email protected]> wrote:
> >
> > MPX setups private anonymous mapping, but uses vma->vm_ops too.
> > This can confuse core VM, as it relies on vm->vm_ops to distinguish
> > file VMAs from anonymous.
> >
> > As result we will get SIGBUS, because handle_pte_fault() thinks it's
> > file VMA without vm_ops->fault and it doesn't know how to handle the
> > situation properly.
> >
> > Let's fix that by not setting ->vm_ops.
> >
> > We don't really need ->vm_ops here: MPX VMA can be detected with VM_MPX
> > flag. And vma_merge() will not merge MPX VMA with non-MPX VMA, because
> > ->vm_flags won't match.
> >
> > The only thing left is name of VMA. I'm not sure if it's part of ABI, or
> > we can just drop it. The patch keep it by providing arch_vma_name() on x86.
> >
> > Build tested only.
> >
> > Signed-off-by: Kirill A. Shutemov <[email protected]>
> > Cc: Dave Hansen <[email protected]>
> > Cc: Andy Lutomirski <[email protected]>
> > Cc: Thomas Gleixner <[email protected]>
> > ---
> > arch/x86/mm/mmap.c | 7 +++++++
> > arch/x86/mm/mpx.c | 20 +-------------------
> > 2 files changed, 8 insertions(+), 19 deletions(-)
> >
> > diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
> > index 9d518d693b4b..844b06d67df4 100644
> > --- a/arch/x86/mm/mmap.c
> > +++ b/arch/x86/mm/mmap.c
> > @@ -126,3 +126,10 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
> > mm->get_unmapped_area = arch_get_unmapped_area_topdown;
> > }
> > }
> > +
> > +const char *arch_vma_name(struct vm_area_struct *vma)
> > +{
> > + if (vma->vm_flags & VM_MPX)
> > + return "[mpx]";
> > + return NULL;
> > +}
>
> I sure that I'm missing something important. This function stores
> "[mpx]" string on this function stack and returns the pointer to that
> address. In current flow, this address is visible and accessible,
> however in can be a different in general case.

The string is not on stack. String literals are in .rodata and caller is
not allowed to modify it since it's "const char *".

--
Kirill

2015-07-13 12:42:49

by Leon Romanovsky

[permalink] [raw]
Subject: Re: [PATCH 2/5] x86, mpx: do not set ->vm_ops on mpx VMAs

On Mon, Jul 13, 2015 at 3:29 PM, Kirill A. Shutemov
<[email protected]> wrote:
> Leon Romanovsky wrote:
>> Hi Kirill,
>>
>> On Mon, Jul 13, 2015 at 1:54 PM, Kirill A. Shutemov
>> <[email protected]> wrote:
>> >
>> > MPX setups private anonymous mapping, but uses vma->vm_ops too.
>> > This can confuse core VM, as it relies on vm->vm_ops to distinguish
>> > file VMAs from anonymous.
>> >
>> > As result we will get SIGBUS, because handle_pte_fault() thinks it's
>> > file VMA without vm_ops->fault and it doesn't know how to handle the
>> > situation properly.
>> >
>> > Let's fix that by not setting ->vm_ops.
>> >
>> > We don't really need ->vm_ops here: MPX VMA can be detected with VM_MPX
>> > flag. And vma_merge() will not merge MPX VMA with non-MPX VMA, because
>> > ->vm_flags won't match.
>> >
>> > The only thing left is name of VMA. I'm not sure if it's part of ABI, or
>> > we can just drop it. The patch keep it by providing arch_vma_name() on x86.
>> >
>> > Build tested only.
>> >
>> > Signed-off-by: Kirill A. Shutemov <[email protected]>
>> > Cc: Dave Hansen <[email protected]>
>> > Cc: Andy Lutomirski <[email protected]>
>> > Cc: Thomas Gleixner <[email protected]>
>> > ---
>> > arch/x86/mm/mmap.c | 7 +++++++
>> > arch/x86/mm/mpx.c | 20 +-------------------
>> > 2 files changed, 8 insertions(+), 19 deletions(-)
>> >
>> > diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
>> > index 9d518d693b4b..844b06d67df4 100644
>> > --- a/arch/x86/mm/mmap.c
>> > +++ b/arch/x86/mm/mmap.c
>> > @@ -126,3 +126,10 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
>> > mm->get_unmapped_area = arch_get_unmapped_area_topdown;
>> > }
>> > }
>> > +
>> > +const char *arch_vma_name(struct vm_area_struct *vma)
>> > +{
>> > + if (vma->vm_flags & VM_MPX)
>> > + return "[mpx]";
>> > + return NULL;
>> > +}
>>
>> I sure that I'm missing something important. This function stores
>> "[mpx]" string on this function stack and returns the pointer to that
>> address. In current flow, this address is visible and accessible,
>> however in can be a different in general case.
>
> The string is not on stack. String literals are in .rodata and caller is
> not allowed to modify it since it's "const char *".
I see, it behaves similiar to global "const char *" variable definition.
Thank you for clarification.

>
> --
> Kirill



--
Leon Romanovsky | Independent Linux Consultant
http://www.leon.nu | [email protected]

2015-07-13 13:08:45

by Johannes Weiner

[permalink] [raw]
Subject: Re: [PATCH 5/5] mm, memcontrol: use vma_is_anonymous() to check for anon VMA

On Mon, Jul 13, 2015 at 01:54:12PM +0300, Kirill A. Shutemov wrote:
> !vma->vm_file is not reliable to detect anon VMA, because not all
> drivers bother set it. Let's use vma_is_anonymous() instead.
>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
> Cc: Johannes Weiner <[email protected]>
> Cc: Michal Hocko <[email protected]>
> ---
> mm/memcontrol.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index acb93c554f6e..a624709f0dd7 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4809,7 +4809,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
> struct address_space *mapping;
> pgoff_t pgoff;
>
> - if (!vma->vm_file) /* anonymous vma */
> + if (vma_is_anonymous(vma)) /* anonymous vma */
> return NULL;
> if (!(mc.flags & MOVE_FILE))
> return NULL;

The next line does vma->vm_file->f_mapping, so it had better be !NULL.

It's not about reliably detecting anonymous vs. file, it is about
whether there is a mapping against which we can do find_get_page().

2015-07-13 13:21:45

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [PATCH 5/5] mm, memcontrol: use vma_is_anonymous() to check for anon VMA

Johannes Weiner wrote:
> On Mon, Jul 13, 2015 at 01:54:12PM +0300, Kirill A. Shutemov wrote:
> > !vma->vm_file is not reliable to detect anon VMA, because not all
> > drivers bother set it. Let's use vma_is_anonymous() instead.
> >
> > Signed-off-by: Kirill A. Shutemov <[email protected]>
> > Cc: Johannes Weiner <[email protected]>
> > Cc: Michal Hocko <[email protected]>
> > ---
> > mm/memcontrol.c | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index acb93c554f6e..a624709f0dd7 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -4809,7 +4809,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
> > struct address_space *mapping;
> > pgoff_t pgoff;
> >
> > - if (!vma->vm_file) /* anonymous vma */
> > + if (vma_is_anonymous(vma)) /* anonymous vma */
> > return NULL;
> > if (!(mc.flags & MOVE_FILE))
> > return NULL;
>
> The next line does vma->vm_file->f_mapping, so it had better be !NULL.
>
> It's not about reliably detecting anonymous vs. file, it is about
> whether there is a mapping against which we can do find_get_page().

You're right. This patch is totally broken.

--
Kirill

2015-07-13 16:55:25

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH 2/5] x86, mpx: do not set ->vm_ops on mpx VMAs

On 07/13, Kirill A. Shutemov wrote:
>
> We don't really need ->vm_ops here: MPX VMA can be detected with VM_MPX
> flag. And vma_merge() will not merge MPX VMA with non-MPX VMA, because
> ->vm_flags won't match.

Agreed.

I am wondering if something like the patch below (on top of yours) makes
sense... Not sure, but mpx_mmap() doesn't look nice too, and with this
change we can unexport mmap_region().

Oleg.


arch/x86/mm/mpx.c | 51 +++++++--------------------------------------------
include/linux/mm.h | 3 +++
mm/mmap.c | 16 +++++++++++-----
3 files changed, 21 insertions(+), 49 deletions(-)


diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 4d1c11c..da8b713 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -24,58 +24,21 @@
*/
static unsigned long mpx_mmap(unsigned long len)
{
- unsigned long ret;
- unsigned long addr, pgoff;
+ unsigned long addr, populate;
struct mm_struct *mm = current->mm;
- vm_flags_t vm_flags;
- struct vm_area_struct *vma;

/* Only bounds table and bounds directory can be allocated here */
if (len != MPX_BD_SIZE_BYTES && len != MPX_BT_SIZE_BYTES)
return -EINVAL;

down_write(&mm->mmap_sem);
-
- /* Too many mappings? */
- if (mm->map_count > sysctl_max_map_count) {
- ret = -ENOMEM;
- goto out;
- }
-
- /* Obtain the address to map to. we verify (or select) it and ensure
- * that it represents a valid section of the address space.
- */
- addr = get_unmapped_area(NULL, 0, len, 0, MAP_ANONYMOUS | MAP_PRIVATE);
- if (addr & ~PAGE_MASK) {
- ret = addr;
- goto out;
- }
-
- vm_flags = VM_READ | VM_WRITE | VM_MPX |
- mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
-
- /* Set pgoff according to addr for anon_vma */
- pgoff = addr >> PAGE_SHIFT;
-
- ret = mmap_region(NULL, addr, len, vm_flags, pgoff);
- if (IS_ERR_VALUE(ret))
- goto out;
-
- vma = find_vma(mm, ret);
- if (!vma) {
- ret = -ENOMEM;
- goto out;
- }
-
- if (vm_flags & VM_LOCKED) {
- up_write(&mm->mmap_sem);
- mm_populate(ret, len);
- return ret;
- }
-
-out:
+ addr = __do_mmap_pgoff(NULL, 0, len, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, &populate, VM_MPX);
up_write(&mm->mmap_sem);
- return ret;
+ if (populate)
+ mm_populate(addr, populate);
+
+ return addr;
}

enum reg_type {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0207ffa..910d475 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1849,6 +1849,9 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo

extern unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);
+extern unsigned long __do_mmap_pgoff(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long prot, unsigned long flags,
+ unsigned long pgoff, unsigned long *populate, vm_flags_t vm_flags);
extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
unsigned long pgoff, unsigned long *populate);
diff --git a/mm/mmap.c b/mm/mmap.c
index 2185cd9..88bc961 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1247,14 +1247,12 @@ static inline int mlock_future_check(struct mm_struct *mm,
/*
* The caller must hold down_write(&current->mm->mmap_sem).
*/
-
-unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+unsigned long __do_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
unsigned long flags, unsigned long pgoff,
- unsigned long *populate)
+ unsigned long *populate, vm_flags_t vm_flags)
{
struct mm_struct *mm = current->mm;
- vm_flags_t vm_flags;

*populate = 0;

@@ -1298,7 +1296,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
* to. we assume access permissions have been handled by the open
* of the memory object, so we don't do any here.
*/
- vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
+ vm_flags |= calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;

if (flags & MAP_LOCKED)
@@ -1396,6 +1394,14 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
return addr;
}

+unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long prot,
+ unsigned long flags, unsigned long pgoff,
+ unsigned long *populate)
+{
+ return __do_mmap_pgoff(file, addr, len, prot, flags, pgoff, populate, 0);
+}
+
SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
unsigned long, fd, unsigned long, pgoff)

2015-07-13 17:07:01

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH 2/5] x86, mpx: do not set ->vm_ops on mpx VMAs

On 07/13/2015 09:53 AM, Oleg Nesterov wrote:
> On 07/13, Kirill A. Shutemov wrote:
>>
>> We don't really need ->vm_ops here: MPX VMA can be detected with VM_MPX
>> flag. And vma_merge() will not merge MPX VMA with non-MPX VMA, because
>> ->vm_flags won't match.
>
> Agreed.
>
> I am wondering if something like the patch below (on top of yours) makes
> sense... Not sure, but mpx_mmap() doesn't look nice too, and with this
> change we can unexport mmap_region().

These both look nice to me (and they both cull specialty MPX code which
is excellent). I'll run them through a quick test.

2015-07-15 23:50:40

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH 4/5] mm, madvise: use vma_is_anonymous() to check for anon VMA

On Mon, Jul 13, 2015 at 01:54:11PM +0300, Kirill A. Shutemov wrote:
> !vma->vm_file is not reliable to detect anon VMA, because not all
> drivers bother set it. Let's use vma_is_anonymous() instead.
>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
> Cc: Minchan Kim <[email protected]>
Acked-by: Minchan Kim <[email protected]>

Thanks.

2015-07-16 11:05:38

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [PATCH 2/5] x86, mpx: do not set ->vm_ops on mpx VMAs

Dave Hansen wrote:
> On 07/13/2015 09:53 AM, Oleg Nesterov wrote:
> > On 07/13, Kirill A. Shutemov wrote:
> >>
> >> We don't really need ->vm_ops here: MPX VMA can be detected with VM_MPX
> >> flag. And vma_merge() will not merge MPX VMA with non-MPX VMA, because
> >> ->vm_flags won't match.
> >
> > Agreed.
> >
> > I am wondering if something like the patch below (on top of yours) makes
> > sense... Not sure, but mpx_mmap() doesn't look nice too, and with this
> > change we can unexport mmap_region().
>
> These both look nice to me (and they both cull specialty MPX code which
> is excellent). I'll run them through a quick test.

Any update?

--
Kirill A. Shutemov

2015-07-16 15:54:16

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH 2/5] x86, mpx: do not set ->vm_ops on mpx VMAs

On 07/16/2015 04:05 AM, Kirill A. Shutemov wrote:
>> > These both look nice to me (and they both cull specialty MPX code which
>> > is excellent). I'll run them through a quick test.
> Any update?

Both patches look fine to me and test OK. Feel free to add my
acked/tested/etc...

2015-07-16 16:09:50

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [PATCH 2/5] x86, mpx: do not set ->vm_ops on mpx VMAs

On Thu, Jul 16, 2015 at 08:53:48AM -0700, Dave Hansen wrote:
> On 07/16/2015 04:05 AM, Kirill A. Shutemov wrote:
> >> > These both look nice to me (and they both cull specialty MPX code which
> >> > is excellent). I'll run them through a quick test.
> > Any update?
>
> Both patches look fine to me and test OK. Feel free to add my
> acked/tested/etc...

Oleg, could you prepare a proper patch with description/signed-off-by?

I'll send updated patchset with all changes to Andrew.

--
Kirill A. Shutemov

2015-07-16 22:27:48

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH 0/1] mm, mpx: add "vm_flags_t vm_flags" arg to do_mmap_pgoff()

On 07/16, Kirill A. Shutemov wrote:
>
> On Thu, Jul 16, 2015 at 08:53:48AM -0700, Dave Hansen wrote:
> > On 07/16/2015 04:05 AM, Kirill A. Shutemov wrote:
> > >> > These both look nice to me (and they both cull specialty MPX code which
> > >> > is excellent). I'll run them through a quick test.
> > > Any update?
> >
> > Both patches look fine to me and test OK. Feel free to add my
> > acked/tested/etc...
>
> Oleg, could you prepare a proper patch with description/signed-off-by?
>
> I'll send updated patchset with all changes to Andrew.

With pleasure, please see 1/1.

Changes:

- s/__do_mmap_pgoff/do_mmap/

- update mm/nommu.c too

- make do_mmap_pgoff() inline (perhaps we should kill it),
this also avoids another change in nommu.c

Oleg.

2015-07-16 22:28:07

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH 1/1] mm, mpx: add "vm_flags_t vm_flags" arg to do_mmap_pgoff()

Add the additional "vm_flags_t vm_flags" argument to do_mmap_pgoff(),
rename it to do_mmap(), and re-introduce do_mmap_pgoff() as a simple
wrapper on top of do_mmap(). Perhaps we should update the callers of
do_mmap_pgoff() and kill it later.

This way mpx_mmap() can simply call do_mmap(vm_flags => VM_MPX) and
do not play with vm internals.

After this change mmap_region() has a single user outside of mmap.c,
arch/tile/mm/elf.c:arch_setup_additional_pages(). It would be nice
to change arch/tile/ and unexport mmap_region().

Signed-off-by: Oleg Nesterov <[email protected]>
---
arch/x86/mm/mpx.c | 51 +++++++--------------------------------------------
include/linux/mm.h | 12 ++++++++++--
mm/mmap.c | 10 ++++------
mm/nommu.c | 15 ++++++++-------
4 files changed, 29 insertions(+), 59 deletions(-)

diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 4d1c11c..fdbd3e0 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -24,58 +24,21 @@
*/
static unsigned long mpx_mmap(unsigned long len)
{
- unsigned long ret;
- unsigned long addr, pgoff;
struct mm_struct *mm = current->mm;
- vm_flags_t vm_flags;
- struct vm_area_struct *vma;
+ unsigned long addr, populate;

/* Only bounds table and bounds directory can be allocated here */
if (len != MPX_BD_SIZE_BYTES && len != MPX_BT_SIZE_BYTES)
return -EINVAL;

down_write(&mm->mmap_sem);
-
- /* Too many mappings? */
- if (mm->map_count > sysctl_max_map_count) {
- ret = -ENOMEM;
- goto out;
- }
-
- /* Obtain the address to map to. we verify (or select) it and ensure
- * that it represents a valid section of the address space.
- */
- addr = get_unmapped_area(NULL, 0, len, 0, MAP_ANONYMOUS | MAP_PRIVATE);
- if (addr & ~PAGE_MASK) {
- ret = addr;
- goto out;
- }
-
- vm_flags = VM_READ | VM_WRITE | VM_MPX |
- mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
-
- /* Set pgoff according to addr for anon_vma */
- pgoff = addr >> PAGE_SHIFT;
-
- ret = mmap_region(NULL, addr, len, vm_flags, pgoff);
- if (IS_ERR_VALUE(ret))
- goto out;
-
- vma = find_vma(mm, ret);
- if (!vma) {
- ret = -ENOMEM;
- goto out;
- }
-
- if (vm_flags & VM_LOCKED) {
- up_write(&mm->mmap_sem);
- mm_populate(ret, len);
- return ret;
- }
-
-out:
+ addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate);
up_write(&mm->mmap_sem);
- return ret;
+ if (populate)
+ mm_populate(addr, populate);
+
+ return addr;
}

enum reg_type {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0207ffa..1f0a56e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1849,11 +1849,19 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo

extern unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);
-extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
- unsigned long pgoff, unsigned long *populate);
+ vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate);
extern int do_munmap(struct mm_struct *, unsigned long, size_t);

+static inline unsigned long
+do_mmap_pgoff(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long prot, unsigned long flags,
+ unsigned long pgoff, unsigned long *populate)
+{
+ return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate);
+}
+
#ifdef CONFIG_MMU
extern int __mm_populate(unsigned long addr, unsigned long len,
int ignore_errors);
diff --git a/mm/mmap.c b/mm/mmap.c
index 2185cd9..2f36ebc 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1247,14 +1247,12 @@ static inline int mlock_future_check(struct mm_struct *mm,
/*
* The caller must hold down_write(&current->mm->mmap_sem).
*/
-
-unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
- unsigned long flags, unsigned long pgoff,
- unsigned long *populate)
+ unsigned long flags, vm_flags_t vm_flags,
+ unsigned long pgoff, unsigned long *populate)
{
struct mm_struct *mm = current->mm;
- vm_flags_t vm_flags;

*populate = 0;

@@ -1298,7 +1296,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
* to. we assume access permissions have been handled by the open
* of the memory object, so we don't do any here.
*/
- vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
+ vm_flags |= calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;

if (flags & MAP_LOCKED)
diff --git a/mm/nommu.c b/mm/nommu.c
index e544508..e3026fd 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1271,13 +1271,14 @@ enomem:
/*
* handle mapping creation for uClinux
*/
-unsigned long do_mmap_pgoff(struct file *file,
- unsigned long addr,
- unsigned long len,
- unsigned long prot,
- unsigned long flags,
- unsigned long pgoff,
- unsigned long *populate)
+unsigned long do_mmap(struct file *file,
+ unsigned long addr,
+ unsigned long len,
+ unsigned long prot,
+ unsigned long flags,
+ vm_flags_t vm_flags,
+ unsigned long pgoff,
+ unsigned long *populate)
{
struct vm_area_struct *vma;
struct vm_region *region;
--
1.5.5.1

2015-07-24 14:39:49

by Paul Gortmaker

[permalink] [raw]
Subject: Re: [PATCH 1/1] mm, mpx: add "vm_flags_t vm_flags" arg to do_mmap_pgoff()

On Thu, Jul 16, 2015 at 6:26 PM, Oleg Nesterov <[email protected]> wrote:
> Add the additional "vm_flags_t vm_flags" argument to do_mmap_pgoff(),
> rename it to do_mmap(), and re-introduce do_mmap_pgoff() as a simple
> wrapper on top of do_mmap(). Perhaps we should update the callers of
> do_mmap_pgoff() and kill it later.

It seems that the version of this patch in linux-next breaks all nommu
builds (m86k, some arm, etc).

mm/nommu.c: In function 'do_mmap':
mm/nommu.c:1248:30: error: 'vm_flags' redeclared as different kind of symbol
mm/nommu.c:1241:15: note: previous definition of 'vm_flags' was here
scripts/Makefile.build:258: recipe for target 'mm/nommu.o' failed

http://kisskb.ellerman.id.au/kisskb/buildresult/12470285/

Bisect says:

31705a3a633bb63683918f055fe6032939672b61 is the first bad commit
commit 31705a3a633bb63683918f055fe6032939672b61
Author: Oleg Nesterov <[email protected]>
Date: Fri Jul 24 09:20:30 2015 +1000

mm, mpx: add "vm_flags_t vm_flags" arg to do_mmap_pgoff()

Paul.
--

>
> This way mpx_mmap() can simply call do_mmap(vm_flags => VM_MPX) and
> do not play with vm internals.
>
> After this change mmap_region() has a single user outside of mmap.c,
> arch/tile/mm/elf.c:arch_setup_additional_pages(). It would be nice
> to change arch/tile/ and unexport mmap_region().
>
> Signed-off-by: Oleg Nesterov <[email protected]>
> ---
> arch/x86/mm/mpx.c | 51 +++++++--------------------------------------------
> include/linux/mm.h | 12 ++++++++++--
> mm/mmap.c | 10 ++++------
> mm/nommu.c | 15 ++++++++-------
> 4 files changed, 29 insertions(+), 59 deletions(-)
>
> diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
> index 4d1c11c..fdbd3e0 100644
> --- a/arch/x86/mm/mpx.c
> +++ b/arch/x86/mm/mpx.c
> @@ -24,58 +24,21 @@
> */
> static unsigned long mpx_mmap(unsigned long len)
> {
> - unsigned long ret;
> - unsigned long addr, pgoff;
> struct mm_struct *mm = current->mm;
> - vm_flags_t vm_flags;
> - struct vm_area_struct *vma;
> + unsigned long addr, populate;
>
> /* Only bounds table and bounds directory can be allocated here */
> if (len != MPX_BD_SIZE_BYTES && len != MPX_BT_SIZE_BYTES)
> return -EINVAL;
>
> down_write(&mm->mmap_sem);
> -
> - /* Too many mappings? */
> - if (mm->map_count > sysctl_max_map_count) {
> - ret = -ENOMEM;
> - goto out;
> - }
> -
> - /* Obtain the address to map to. we verify (or select) it and ensure
> - * that it represents a valid section of the address space.
> - */
> - addr = get_unmapped_area(NULL, 0, len, 0, MAP_ANONYMOUS | MAP_PRIVATE);
> - if (addr & ~PAGE_MASK) {
> - ret = addr;
> - goto out;
> - }
> -
> - vm_flags = VM_READ | VM_WRITE | VM_MPX |
> - mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
> -
> - /* Set pgoff according to addr for anon_vma */
> - pgoff = addr >> PAGE_SHIFT;
> -
> - ret = mmap_region(NULL, addr, len, vm_flags, pgoff);
> - if (IS_ERR_VALUE(ret))
> - goto out;
> -
> - vma = find_vma(mm, ret);
> - if (!vma) {
> - ret = -ENOMEM;
> - goto out;
> - }
> -
> - if (vm_flags & VM_LOCKED) {
> - up_write(&mm->mmap_sem);
> - mm_populate(ret, len);
> - return ret;
> - }
> -
> -out:
> + addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE,
> + MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate);
> up_write(&mm->mmap_sem);
> - return ret;
> + if (populate)
> + mm_populate(addr, populate);
> +
> + return addr;
> }
>
> enum reg_type {
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0207ffa..1f0a56e 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1849,11 +1849,19 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
>
> extern unsigned long mmap_region(struct file *file, unsigned long addr,
> unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);
> -extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
> +extern unsigned long do_mmap(struct file *file, unsigned long addr,
> unsigned long len, unsigned long prot, unsigned long flags,
> - unsigned long pgoff, unsigned long *populate);
> + vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate);
> extern int do_munmap(struct mm_struct *, unsigned long, size_t);
>
> +static inline unsigned long
> +do_mmap_pgoff(struct file *file, unsigned long addr,
> + unsigned long len, unsigned long prot, unsigned long flags,
> + unsigned long pgoff, unsigned long *populate)
> +{
> + return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate);
> +}
> +
> #ifdef CONFIG_MMU
> extern int __mm_populate(unsigned long addr, unsigned long len,
> int ignore_errors);
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 2185cd9..2f36ebc 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1247,14 +1247,12 @@ static inline int mlock_future_check(struct mm_struct *mm,
> /*
> * The caller must hold down_write(&current->mm->mmap_sem).
> */
> -
> -unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
> +unsigned long do_mmap(struct file *file, unsigned long addr,
> unsigned long len, unsigned long prot,
> - unsigned long flags, unsigned long pgoff,
> - unsigned long *populate)
> + unsigned long flags, vm_flags_t vm_flags,
> + unsigned long pgoff, unsigned long *populate)
> {
> struct mm_struct *mm = current->mm;
> - vm_flags_t vm_flags;
>
> *populate = 0;
>
> @@ -1298,7 +1296,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
> * to. we assume access permissions have been handled by the open
> * of the memory object, so we don't do any here.
> */
> - vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
> + vm_flags |= calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
> mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
>
> if (flags & MAP_LOCKED)
> diff --git a/mm/nommu.c b/mm/nommu.c
> index e544508..e3026fd 100644
> --- a/mm/nommu.c
> +++ b/mm/nommu.c
> @@ -1271,13 +1271,14 @@ enomem:
> /*
> * handle mapping creation for uClinux
> */
> -unsigned long do_mmap_pgoff(struct file *file,
> - unsigned long addr,
> - unsigned long len,
> - unsigned long prot,
> - unsigned long flags,
> - unsigned long pgoff,
> - unsigned long *populate)
> +unsigned long do_mmap(struct file *file,
> + unsigned long addr,
> + unsigned long len,
> + unsigned long prot,
> + unsigned long flags,
> + vm_flags_t vm_flags,
> + unsigned long pgoff,
> + unsigned long *populate)
> {
> struct vm_area_struct *vma;
> struct vm_region *region;
> --
> 1.5.5.1
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

2015-07-27 19:35:00

by Mark Salter

[permalink] [raw]
Subject: Re: [PATCH 1/1] mm, mpx: add "vm_flags_t vm_flags" arg to do_mmap_pgoff()

On Fri, 2015-07-24 at 10:39 -0400, Paul Gortmaker wrote:
> On Thu, Jul 16, 2015 at 6:26 PM, Oleg Nesterov <[email protected]> wrote:
> > Add the additional "vm_flags_t vm_flags" argument to do_mmap_pgoff(),
> > rename it to do_mmap(), and re-introduce do_mmap_pgoff() as a simple
> > wrapper on top of do_mmap(). Perhaps we should update the callers of
> > do_mmap_pgoff() and kill it later.
>
> It seems that the version of this patch in linux-next breaks all nommu
> builds (m86k, some arm, etc).
>
> mm/nommu.c: In function 'do_mmap':
> mm/nommu.c:1248:30: error: 'vm_flags' redeclared as different kind of symbol
> mm/nommu.c:1241:15: note: previous definition of 'vm_flags' was here
> scripts/Makefile.build:258: recipe for target 'mm/nommu.o' failed
>
> http://kisskb.ellerman.id.au/kisskb/buildresult/12470285/
>
> Bisect says:
>
> 31705a3a633bb63683918f055fe6032939672b61 is the first bad commit
> commit 31705a3a633bb63683918f055fe6032939672b61
> Author: Oleg Nesterov <[email protected]>
> Date: Fri Jul 24 09:20:30 2015 +1000
>
> mm, mpx: add "vm_flags_t vm_flags" arg to do_mmap_pgoff()
>
> Paul.

This fixes the build error and runs fine on c6x:

diff --git a/mm/nommu.c b/mm/nommu.c
index 530eea5..af2196e 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1245,7 +1245,7 @@ unsigned long do_mmap(struct file *file,
struct vm_area_struct *vma;
struct vm_region *region;
struct rb_node *rb;
- unsigned long capabilities, vm_flags, result;
+ unsigned long capabilities, result;
int ret;

*populate = 0;
@@ -1263,7 +1263,7 @@ unsigned long do_mmap(struct file *file,

/* we've determined that we can make the mapping, now translate what we
* now know into VMA flags */
- vm_flags = determine_vm_flags(file, prot, flags, capabilities);
+ vm_flags |= determine_vm_flags(file, prot, flags, capabilities);

/* we're going to need to record the mapping */
region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL);