LinuxLists.cc - [PATCH] xen: add new hypercall buffer mapping device

2018-06-15 13:18:26

Subject: [PATCH] xen: add new hypercall buffer mapping device

For passing arbitrary data from user land to the Xen hypervisor the
Xen tools today are using mlock()ed buffers. Unfortunately the kernel
might change access rights of such buffers for brief periods of time
e.g. for page migration or compaction, leading to access faults in the
hypervisor, as the hypervisor can't use the locks of the kernel.

In order to solve this problem add a new device node to the Xen privcmd
driver to easily allocate hypercall buffers via mmap(). The memory is
allocated in the kernel and just mapped into user space. Marked as
VM_IO the user mapping will not be subject to page migration et al.

Signed-off-by: Juergen Gross <[email protected]>
---
drivers/xen/Makefile | 2 +-
drivers/xen/privcmd-buf.c | 216 ++++++++++++++++++++++++++++++++++++++++++++++
drivers/xen/privcmd.c | 9 ++
drivers/xen/privcmd.h | 3 +
drivers/xen/xenfs/super.c | 2 +
5 files changed, 231 insertions(+), 1 deletion(-)
create mode 100644 drivers/xen/privcmd-buf.c

diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 451e833f5931..48b154276179 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -41,4 +41,4 @@ obj-$(CONFIG_XEN_PVCALLS_FRONTEND) += pvcalls-front.o
xen-evtchn-y := evtchn.o
xen-gntdev-y := gntdev.o
xen-gntalloc-y := gntalloc.o
-xen-privcmd-y := privcmd.o
+xen-privcmd-y := privcmd.o privcmd-buf.o
diff --git a/drivers/xen/privcmd-buf.c b/drivers/xen/privcmd-buf.c
new file mode 100644
index 000000000000..71234a8b7e55
--- /dev/null
+++ b/drivers/xen/privcmd-buf.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+/******************************************************************************
+ * privcmd-buf.c
+ *
+ * Mmap of hypercall buffers.
+ *
+ * Copyright (c) 2018 Juergen Gross
+ */
+
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include "privcmd.h"
+
+MODULE_LICENSE("GPL");
+
+static int limit = 64;
+module_param(limit, int, 0644);
+MODULE_PARM_DESC(limit, "Maximum number of pages that may be allocated by "
+ "the privcmd-buf device per open file");
+
+struct privcmd_buf_private {
+ struct mutex lock;
+ struct list_head list;
+ unsigned int allocated;
+};
+
+struct privcmd_buf_vma_private {
+ struct privcmd_buf_private *file_priv;
+ struct list_head list;
+ unsigned int users;
+ unsigned int n_pages;
+ struct page *pages[];
+};
+
+static int privcmd_buf_open(struct inode *ino, struct file *file)
+{
+ struct privcmd_buf_private *file_priv;
+
+ file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
+ if (!file_priv)
+ return -ENOMEM;
+
+ mutex_init(&file_priv->lock);
+ INIT_LIST_HEAD(&file_priv->list);
+
+ file->private_data = file_priv;
+
+ return 0;
+}
+
+static void privcmd_buf_vmapriv_free(struct privcmd_buf_vma_private *vma_priv)
+{
+ unsigned int i;
+
+ vma_priv->file_priv->allocated -= vma_priv->n_pages;
+
+ list_del(&vma_priv->list);
+
+ for (i = 0; i < vma_priv->n_pages; i++)
+ if (vma_priv->pages[i])
+ __free_page(vma_priv->pages[i]);
+
+ kfree(vma_priv);
+}
+
+static int privcmd_buf_release(struct inode *ino, struct file *file)
+{
+ struct privcmd_buf_private *file_priv = file->private_data;
+ struct privcmd_buf_vma_private *vma_priv;
+
+ mutex_lock(&file_priv->lock);
+
+ while (!list_empty(&file_priv->list)) {
+ vma_priv = list_first_entry(&file_priv->list,
+ struct privcmd_buf_vma_private,
+ list);
+ privcmd_buf_vmapriv_free(vma_priv);
+ }
+
+ mutex_unlock(&file_priv->lock);
+
+ kfree(file_priv);
+
+ return 0;
+}
+
+static void privcmd_buf_vma_open(struct vm_area_struct *vma)
+{
+ struct privcmd_buf_vma_private *vma_priv = vma->vm_private_data;
+
+ if (!vma_priv)
+ return;
+
+ mutex_lock(&vma_priv->file_priv->lock);
+ vma_priv->users++;
+ mutex_unlock(&vma_priv->file_priv->lock);
+}
+
+static void privcmd_buf_vma_close(struct vm_area_struct *vma)
+{
+ struct privcmd_buf_vma_private *vma_priv = vma->vm_private_data;
+ struct privcmd_buf_private *file_priv;
+
+ if (!vma_priv)
+ return;
+
+ file_priv = vma_priv->file_priv;
+
+ mutex_lock(&file_priv->lock);
+
+ vma_priv->users--;
+ if (!vma_priv->users)
+ privcmd_buf_vmapriv_free(vma_priv);
+
+ mutex_unlock(&file_priv->lock);
+}
+
+static vm_fault_t privcmd_buf_vma_fault(struct vm_fault *vmf)
+{
+ pr_debug("fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
+ vmf->vma, vmf->vma->vm_start, vmf->vma->vm_end,
+ vmf->pgoff, (void *)vmf->address);
+
+ return VM_FAULT_SIGBUS;
+}
+
+static const struct vm_operations_struct privcmd_buf_vm_ops = {
+ .open = privcmd_buf_vma_open,
+ .close = privcmd_buf_vma_close,
+ .fault = privcmd_buf_vma_fault,
+};
+
+static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct privcmd_buf_private *file_priv = file->private_data;
+ struct privcmd_buf_vma_private *vma_priv;
+ unsigned int count = vma_pages(vma);
+ unsigned int i;
+ int ret = 0;
+
+ if (!(vma->vm_flags & VM_SHARED)) {
+ pr_err("Mapping must be shared\n");
+ return -EINVAL;
+ }
+
+ if (file_priv->allocated + count > limit) {
+ pr_err("Mapping limit reached!\n");
+ return -ENOSPC;
+ }
+
+ vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *),
+ GFP_KERNEL);
+ if (!vma_priv)
+ return -ENOMEM;
+
+ vma_priv->n_pages = count;
+ count = 0;
+ for (i = 0; i < vma_priv->n_pages; i++) {
+ vma_priv->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!vma_priv->pages[i])
+ break;
+ count++;
+ }
+
+ mutex_lock(&file_priv->lock);
+
+ file_priv->allocated += count;
+
+ vma_priv->file_priv = file_priv;
+ vma_priv->users = 1;
+
+ vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+ vma->vm_ops = &privcmd_buf_vm_ops;
+ vma->vm_private_data = vma_priv;
+
+ list_add(&vma_priv->list, &file_priv->list);
+
+ if (vma_priv->n_pages != count)
+ ret = -ENOMEM;
+ else
+ for (i = 0; i < vma_priv->n_pages; i++) {
+ ret = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
+ vma_priv->pages[i]);
+ if (ret)
+ break;
+ }
+
+ if (ret)
+ privcmd_buf_vmapriv_free(vma_priv);
+
+ mutex_unlock(&file_priv->lock);
+
+ return ret;
+}
+
+const struct file_operations xen_privcmdbuf_fops = {
+ .owner = THIS_MODULE,
+ .open = privcmd_buf_open,
+ .release = privcmd_buf_release,
+ .mmap = privcmd_buf_mmap,
+};
+EXPORT_SYMBOL_GPL(xen_privcmdbuf_fops);
+
+struct miscdevice xen_privcmdbuf_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "xen/privcmd-buf",
+ .fops = &xen_privcmdbuf_fops,
+};
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 8ae0349d9f0a..7e6e682104dc 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -1007,12 +1007,21 @@ static int __init privcmd_init(void)
pr_err("Could not register Xen privcmd device\n");
return err;
}
+
+ err = misc_register(&xen_privcmdbuf_dev);
+ if (err != 0) {
+ pr_err("Could not register Xen hypercall-buf device\n");
+ misc_deregister(&privcmd_dev);
+ return err;
+ }
+
return 0;
}

static void __exit privcmd_exit(void)
{
misc_deregister(&privcmd_dev);
+ misc_deregister(&xen_privcmdbuf_dev);
}

module_init(privcmd_init);
diff --git a/drivers/xen/privcmd.h b/drivers/xen/privcmd.h
index 14facaeed36f..0dd9f8f67ee3 100644
--- a/drivers/xen/privcmd.h
+++ b/drivers/xen/privcmd.h
@@ -1,3 +1,6 @@
#include <linux/fs.h>

extern const struct file_operations xen_privcmd_fops;
+extern const struct file_operations xen_privcmdbuf_fops;
+
+extern struct miscdevice xen_privcmdbuf_dev;
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 71ddfb4cf61c..d752d0dd3d1d 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -48,6 +48,7 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
[2] = { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR },
{ "capabilities", &capabilities_file_ops, S_IRUGO },
{ "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
+ { "privcmd-buf", &xen_privcmdbuf_fops, S_IRUSR|S_IWUSR },
{""},
};

@@ -55,6 +56,7 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
[2] = { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR },
{ "capabilities", &capabilities_file_ops, S_IRUGO },
{ "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
+ { "privcmd-buf", &xen_privcmdbuf_fops, S_IRUSR|S_IWUSR },
{ "xsd_kva", &xsd_kva_file_ops, S_IRUSR|S_IWUSR},
{ "xsd_port", &xsd_port_file_ops, S_IRUSR|S_IWUSR},
#ifdef CONFIG_XEN_SYMS
--
2.13.7

2018-06-15 14:16:45

by Andrew Cooper

[permalink] [raw]

Subject: Re: [Xen-devel] [PATCH] xen: add new hypercall buffer mapping device

On 15/06/18 14:17, Juergen Gross wrote:
> +MODULE_LICENSE("GPL");
> +
> +static int limit = 64;
> +module_param(limit, int, 0644);
> +MODULE_PARM_DESC(limit, "Maximum number of pages that may be allocated by "
> + "the privcmd-buf device per open file");

I have a feeling that, once we try and remove some of the bounce
buffering, 64 pages will be somewhat restricting. In particular,
migration performance will benefit by keeping the logdirty bitmap buffer
persistently mapped, rather than allocated/bounced/deallocated on each
iteration.

However, perhaps 64 is fine for now.

> +static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> + struct privcmd_buf_private *file_priv = file->private_data;
> + struct privcmd_buf_vma_private *vma_priv;
> + unsigned int count = vma_pages(vma);

This will truncate to 0 if anyone tried mmap()ing 8T (if I've done my
calculations correctly) of virtual address space.

> + unsigned int i;
> + int ret = 0;
> +
> + if (!(vma->vm_flags & VM_SHARED)) {
> + pr_err("Mapping must be shared\n");
> + return -EINVAL;
> + }
> +
> + if (file_priv->allocated + count > limit) {

cout > limit || (allocated + count) > limit to avoid overflows.

> + pr_err("Mapping limit reached!\n");
> + return -ENOSPC;
> + }
> +
> + vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *),
> + GFP_KERNEL);
> + if (!vma_priv)
> + return -ENOMEM;
> +
> + vma_priv->n_pages = count;
> + count = 0;
> + for (i = 0; i < vma_priv->n_pages; i++) {
> + vma_priv->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
> + if (!vma_priv->pages[i])
> + break;
> + count++;
> + }
> +
> + mutex_lock(&file_priv->lock);
> +
> + file_priv->allocated += count;
> +
> + vma_priv->file_priv = file_priv;
> + vma_priv->users = 1;
> +
> + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;

Why DONTDUMP? Its just data, and stands a reasonable chance of being
related to the cause of a crash.

> + vma->vm_ops = &privcmd_buf_vm_ops;
> + vma->vm_private_data = vma_priv;
> +
> + list_add(&vma_priv->list, &file_priv->list);
> +
> + if (vma_priv->n_pages != count)
> + ret = -ENOMEM;
> + else
> + for (i = 0; i < vma_priv->n_pages; i++) {
> + ret = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
> + vma_priv->pages[i]);
> + if (ret)
> + break;
> + }
> +
> + if (ret)
> + privcmd_buf_vmapriv_free(vma_priv);
> +
> + mutex_unlock(&file_priv->lock);
> +
> + return ret;
> +}
> +
> +const struct file_operations xen_privcmdbuf_fops = {
> + .owner = THIS_MODULE,
> + .open = privcmd_buf_open,
> + .release = privcmd_buf_release,
> + .mmap = privcmd_buf_mmap,
> +};
> +EXPORT_SYMBOL_GPL(xen_privcmdbuf_fops);
> +
> +struct miscdevice xen_privcmdbuf_dev = {
> + .minor = MISC_DYNAMIC_MINOR,
> + .name = "xen/privcmd-buf",

Sorry to nitpick, but how about naming this just "xen/hypercall" ?

privcmd is currently a rather large security hole because it allows
userspace to have access to all the hypercalls, including the ones which
should be restricted to just the kernel. In the past, a plan has been
floated to slowly replace the use of the raw ioctl() with proper ioctls
for the hypercalls which userspace might reasonably use.
> diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
> index 71ddfb4cf61c..d752d0dd3d1d 100644
> --- a/drivers/xen/xenfs/super.c
> +++ b/drivers/xen/xenfs/super.c
> @@ -48,6 +48,7 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
> [2] = { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR },
> { "capabilities", &capabilities_file_ops, S_IRUGO },
> { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
> + { "privcmd-buf", &xen_privcmdbuf_fops, S_IRUSR|S_IWUSR },

Do we really need to provide the fallback here? /dev/xen has been
around for ages, and it would really be a good thing if we can
eventually retire xenfs.

~Andrew

2018-06-15 14:36:54

by Jan Beulich

[permalink] [raw]

Subject: Re: [Xen-devel] [PATCH] xen: add new hypercall buffer mapping device

>>> On 15.06.18 at 15:17, <[email protected]> wrote:
> --- /dev/null
> +++ b/drivers/xen/privcmd-buf.c
> @@ -0,0 +1,216 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +
> +/******************************************************************************
> + * privcmd-buf.c
> + *
> + * Mmap of hypercall buffers.
> + *
> + * Copyright (c) 2018 Juergen Gross
> + */
> +
> +#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/list.h>
> +#include <linux/miscdevice.h>
> +#include <linux/mm.h>
> +#include <linux/slab.h>
> +
> +#include "privcmd.h"
> +
> +MODULE_LICENSE("GPL");
> +
> +static int limit = 64;
> +module_param(limit, int, 0644);

Can this go negative? If not - "unsigned int" and "uint" prehaps?

> +static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> + struct privcmd_buf_private *file_priv = file->private_data;
> + struct privcmd_buf_vma_private *vma_priv;
> + unsigned int count = vma_pages(vma);
> + unsigned int i;
> + int ret = 0;
> +
> + if (!(vma->vm_flags & VM_SHARED)) {
> + pr_err("Mapping must be shared\n");
> + return -EINVAL;
> + }
> +
> + if (file_priv->allocated + count > limit) {
> + pr_err("Mapping limit reached!\n");

For both error messages - if you really want them, I think they should be
made more helpful such that it is possible to identify the offender. Log at
least process name and pid, or drop the messages?

> + return -ENOSPC;
> + }
> +
> + vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *),
> + GFP_KERNEL);
> + if (!vma_priv)
> + return -ENOMEM;
> +
> + vma_priv->n_pages = count;
> + count = 0;
> + for (i = 0; i < vma_priv->n_pages; i++) {
> + vma_priv->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
> + if (!vma_priv->pages[i])
> + break;
> + count++;
> + }
> +
> + mutex_lock(&file_priv->lock);
> +
> + file_priv->allocated += count;
> +
> + vma_priv->file_priv = file_priv;
> + vma_priv->users = 1;
> +
> + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
> + vma->vm_ops = &privcmd_buf_vm_ops;
> + vma->vm_private_data = vma_priv;
> +
> + list_add(&vma_priv->list, &file_priv->list);
> +
> + if (vma_priv->n_pages != count)
> + ret = -ENOMEM;
> + else
> + for (i = 0; i < vma_priv->n_pages; i++) {
> + ret = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
> + vma_priv->pages[i]);
> + if (ret)
> + break;
> + }
> +
> + if (ret)
> + privcmd_buf_vmapriv_free(vma_priv);

Don't you also need to undo the partially successful insertion?

> +struct miscdevice xen_privcmdbuf_dev = {
> + .minor = MISC_DYNAMIC_MINOR,

While dynamic minors are of course much better than fixed ones (as
we used to use many years ago), but aren't they still a relatively
limited resource? By setting a "mode" on a handle to the original
privcmd interface, no new minor would be needed.

> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -1007,12 +1007,21 @@ static int __init privcmd_init(void)
> pr_err("Could not register Xen privcmd device\n");
> return err;
> }
> +
> + err = misc_register(&xen_privcmdbuf_dev);
> + if (err != 0) {
> + pr_err("Could not register Xen hypercall-buf device\n");
> + misc_deregister(&privcmd_dev);
> + return err;

Wouldn't this better be a warning only, without failing driver init?

Jan

2018-06-15 14:40:40

by Jürgen Groß

[permalink] [raw]

Subject: Re: [Xen-devel] [PATCH] xen: add new hypercall buffer mapping device

On 15/06/18 16:15, Andrew Cooper wrote:
> On 15/06/18 14:17, Juergen Gross wrote:
>> +MODULE_LICENSE("GPL");
>> +
>> +static int limit = 64;
>> +module_param(limit, int, 0644);
>> +MODULE_PARM_DESC(limit, "Maximum number of pages that may be allocated by "
>> + "the privcmd-buf device per open file");
>
> I have a feeling that, once we try and remove some of the bounce
> buffering, 64 pages will be somewhat restricting. In particular,
> migration performance will benefit by keeping the logdirty bitmap buffer
> persistently mapped, rather than allocated/bounced/deallocated on each
> iteration.
>
> However, perhaps 64 is fine for now.
>
>> +static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma)
>> +{
>> + struct privcmd_buf_private *file_priv = file->private_data;
>> + struct privcmd_buf_vma_private *vma_priv;
>> + unsigned int count = vma_pages(vma);
>
> This will truncate to 0 if anyone tried mmap()ing 8T (if I've done my
> calculations correctly) of virtual address space.

Okay, I'll change the type to unsigned long.

>
>> + unsigned int i;
>> + int ret = 0;
>> +
>> + if (!(vma->vm_flags & VM_SHARED)) {
>> + pr_err("Mapping must be shared\n");
>> + return -EINVAL;
>> + }
>> +
>> + if (file_priv->allocated + count > limit) {
>
> cout > limit || (allocated + count) > limit to avoid overflows.

unsigned long again.

>
>> + pr_err("Mapping limit reached!\n");
>> + return -ENOSPC;
>> + }
>> +
>> + vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *),
>> + GFP_KERNEL);
>> + if (!vma_priv)
>> + return -ENOMEM;
>> +
>> + vma_priv->n_pages = count;
>> + count = 0;
>> + for (i = 0; i < vma_priv->n_pages; i++) {
>> + vma_priv->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
>> + if (!vma_priv->pages[i])
>> + break;
>> + count++;
>> + }
>> +
>> + mutex_lock(&file_priv->lock);
>> +
>> + file_priv->allocated += count;
>> +
>> + vma_priv->file_priv = file_priv;
>> + vma_priv->users = 1;
>> +
>> + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
>
> Why DONTDUMP? Its just data, and stands a reasonable chance of being
> related to the cause of a crash.

Hmm, yes. I'll drop it.

>
>> + vma->vm_ops = &privcmd_buf_vm_ops;
>> + vma->vm_private_data = vma_priv;
>> +
>> + list_add(&vma_priv->list, &file_priv->list);
>> +
>> + if (vma_priv->n_pages != count)
>> + ret = -ENOMEM;
>> + else
>> + for (i = 0; i < vma_priv->n_pages; i++) {
>> + ret = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
>> + vma_priv->pages[i]);
>> + if (ret)
>> + break;
>> + }
>> +
>> + if (ret)
>> + privcmd_buf_vmapriv_free(vma_priv);
>> +
>> + mutex_unlock(&file_priv->lock);
>> +
>> + return ret;
>> +}
>> +
>> +const struct file_operations xen_privcmdbuf_fops = {
>> + .owner = THIS_MODULE,
>> + .open = privcmd_buf_open,
>> + .release = privcmd_buf_release,
>> + .mmap = privcmd_buf_mmap,
>> +};
>> +EXPORT_SYMBOL_GPL(xen_privcmdbuf_fops);
>> +
>> +struct miscdevice xen_privcmdbuf_dev = {
>> + .minor = MISC_DYNAMIC_MINOR,
>> + .name = "xen/privcmd-buf",
>
> Sorry to nitpick, but how about naming this just "xen/hypercall" ?

I really have no special preferences here.

> privcmd is currently a rather large security hole because it allows
> userspace to have access to all the hypercalls, including the ones which
> should be restricted to just the kernel. In the past, a plan has been
> floated to slowly replace the use of the raw ioctl() with proper ioctls
> for the hypercalls which userspace might reasonably use.

I'd rather let the privcmd driver either ask the hypervisor which
hypercalls are fine to call from user mode, or let it encapsulate
the hypercall in a new "user hypercall" which the hypervisor can
verify then.

>> diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
>> index 71ddfb4cf61c..d752d0dd3d1d 100644
>> --- a/drivers/xen/xenfs/super.c
>> +++ b/drivers/xen/xenfs/super.c
>> @@ -48,6 +48,7 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
>> [2] = { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR },
>> { "capabilities", &capabilities_file_ops, S_IRUGO },
>> { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
>> + { "privcmd-buf", &xen_privcmdbuf_fops, S_IRUSR|S_IWUSR },
>
> Do we really need to provide the fallback here? /dev/xen has been
> around for ages, and it would really be a good thing if we can
> eventually retire xenfs.

I'd be fine dropping it.

Just did some archaeology: /dev/xen is supported since Xen 4.5. Do we
really want to drop support of older Xen versions in the Linux kernel?

Juergen

2018-06-15 14:44:09

by Boris Ostrovsky

[permalink] [raw]

Subject: Re: [PATCH] xen: add new hypercall buffer mapping device

On 06/15/2018 09:17 AM, Juergen Gross wrote:
> +static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> + struct privcmd_buf_private *file_priv = file->private_data;
> + struct privcmd_buf_vma_private *vma_priv;
> + unsigned int count = vma_pages(vma);
> + unsigned int i;
> + int ret = 0;
> +
> + if (!(vma->vm_flags & VM_SHARED)) {
> + pr_err("Mapping must be shared\n");
> + return -EINVAL;
> + }
> +
> + if (file_priv->allocated + count > limit) {
> + pr_err("Mapping limit reached!\n");
> + return -ENOSPC;

This error (which I thought should have been E2BIG) is not in the list
of allowed error codes (per man page). I think it it's either EINVAL or
ENOMEM (EINVAL seems more appropriate to me).

I am also not sure about pr_err as the caller can force it (although
presumably the file is only accessible to superuser).

-boris

2018-06-15 15:26:48

by Jürgen Groß

[permalink] [raw]

Subject: Re: [PATCH] xen: add new hypercall buffer mapping device

On 15/06/18 16:43, Boris Ostrovsky wrote:
> On 06/15/2018 09:17 AM, Juergen Gross wrote:
>> +static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma)
>> +{
>> + struct privcmd_buf_private *file_priv = file->private_data;
>> + struct privcmd_buf_vma_private *vma_priv;
>> + unsigned int count = vma_pages(vma);
>> + unsigned int i;
>> + int ret = 0;
>> +
>> + if (!(vma->vm_flags & VM_SHARED)) {
>> + pr_err("Mapping must be shared\n");
>> + return -EINVAL;
>> + }
>> +
>> + if (file_priv->allocated + count > limit) {
>> + pr_err("Mapping limit reached!\n");
>> + return -ENOSPC;
>
>
> This error (which I thought should have been E2BIG) is not in the list
> of allowed error codes (per man page). I think it it's either EINVAL or
> ENOMEM (EINVAL seems more appropriate to me).

Okay.

> I am also not sure about pr_err as the caller can force it (although
> presumably the file is only accessible to superuser).

I've already replied to Jan: I'll drop the messages.

Juergen